In [4]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import time

In [5]:
def fetch_data(symbol, start_date, end_date):
    try:
        data = yf.download(symbol, start=start_date, end=end_date)
        return data
    except Exception as e:
        print(f"Error fetching data: {e}")
        return None


In [6]:
def preprocess_data(data):
    # Calculate additional features
    data['Returns'] = data['Close'].pct_change()
    data['MA5'] = data['Close'].rolling(window=5).mean()
    data['MA20'] = data['Close'].rolling(window=20).mean()
    data['Volatility'] = data['Returns'].rolling(window=20).std()
    
    # Drop NaN values
    data = data.dropna()
    
    # Create features and target
    features = ['Open', 'High', 'Low', 'Volume', 'Returns', 'MA5', 'MA20', 'Volatility']
    X = data[features]
    y = data['Close']
    
    return X, y


In [7]:
def train_model(X, y):
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train a Random Forest model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train_scaled, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test_scaled)
    
    # Calculate error
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    
    return model, scaler, rmse


In [10]:
symbol = "PSX"  # Adjust this symbol if needed
start_date = "2020-01-01"
end_date = "2023-12-31"


In [11]:
data = fetch_data(symbol, start_date, end_date)
if data is not None:
    print("Data fetched successfully")
else:
    print("Failed to fetch data")


[*********************100%***********************]  1 of 1 completed

Data fetched successfully





In [12]:
if data is not None:
    X, y = preprocess_data(data)
    print(f"Data preprocessed. Number of samples: {len(X)}")
else:
    print("Data not available for preprocessing.")

Data preprocessed. Number of samples: 986


In [13]:
if data is not None:
    model, scaler, rmse = train_model(X, y)
    print(f"Model trained. RMSE: {rmse}")
else:
    print("No data available for training.")


Model trained. RMSE: 0.942521024833002


In [15]:
new_start_date = "2024-01-01"
new_end_date = "2024-02-28"
new_data = fetch_data(symbol, new_start_date, new_end_date)

if new_data is not None:
    print("New data fetched successfully for prediction")
else:
    print("Failed to fetch new data for prediction")

[*********************100%***********************]  1 of 1 completed

New data fetched successfully for prediction





In [19]:
def make_prediction(model, scaler, input_data):
    # Ensure input_data is a DataFrame with the same structure as the training data
    input_df = pd.DataFrame([input_data], columns=['Open', 'High', 'Low', 'Volume', 'Returns', 'MA5', 'MA20', 'Volatility'])
    
    # Scale the input data using the same scaler as used during training
    input_scaled = scaler.transform(input_df)
    
    # Make the prediction
    prediction = model.predict(input_scaled)
    
    return prediction[0]


In [20]:
# Example input data
example_input = {
    'Open': 43000,          # Replace with actual values
    'High': 43500,
    'Low': 42800,
    'Volume': 100000,
    'Returns': 0.002,       # Example percentage change
    'MA5': 43200,           # 5-day moving average
    'MA20': 43150,          # 20-day moving average
    'Volatility': 0.01      # 20-day standard deviation of returns
}

# Check if model and scaler are available
if 'model' in locals() and 'scaler' in locals():
    # Make prediction
    prediction = make_prediction(model, scaler, example_input)
    print(f"Predicted Close Price: {prediction}")
else:
    print("Model or scaler is not available. Please train the model first.")


Predicted Close Price: 134.4006999206543


In [21]:
# Fetch live data from Pakistan Stock Exchange
data = yf.download("PSX", period="1d", interval="1m")
data.to_csv('historical_data.csv')

[*********************100%***********************]  1 of 1 completed


data = pd.read_csv("historical_data.csv")
features = ['Datetime','Close']
X = data.drop(features, axis=1)
y = data["Close"]
model = RandomForestRegressor()
model.fit(X, y)