load


In [1]:
import pandas as pd
import numpy as np
import os
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

def load_data():
    df = pd.read_csv('idx.csv', parse_dates=['Date'])
    df.sort_values(['Ticker', 'Date'], inplace=True)
    df.dropna(inplace=True)
    df.reset_index(drop=True, inplace=True)
    return df

define function for training model, using linear regression and MSE (mean squared error)

In [2]:

def train_model_for_ticker(ticker, df):
    ticker_data = df[df['Ticker'] == ticker].copy()
    ticker_data.sort_values('Date', inplace=True)
    ticker_data.reset_index(drop=True, inplace=True)
    
    if len(ticker_data) < 10:
        return None, None

    for lag in range(1, 6):
        ticker_data[f'lag_{lag}'] = ticker_data['Close'].shift(lag)
    ticker_data.dropna(inplace=True)
    
    X = ticker_data[[f'lag_{lag}' for lag in range(1, 6)]]
    y = ticker_data['Close']
    
    if len(X) < 10:
        return None, None

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, shuffle=False
    )
    
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    
    return model, mse


In [3]:

def get_last_5_closing_prices(ticker, df):
    ticker_data = df[df['Ticker'] == ticker].copy()
    ticker_data.sort_values('Date', inplace=True)
    return ticker_data['Close'].iloc[-5:].values


define function for next closing price

In [4]:

def predict_next_price(ticker, df, models):
    if ticker not in models:
        return None, "No trained model available for this ticker."
        
    mse = models[ticker]['mse']
    last_5_prices = get_last_5_closing_prices(ticker, df)
    
    if len(last_5_prices) != 5:
        return None, "Not enough data to make a prediction."
        
    model = models[ticker]['model']
    # Create DataFrame with proper feature names
    input_features = pd.DataFrame(
        [last_5_prices], 
        columns=[f'lag_{i}' for i in range(1, 6)]
    )
    predicted_price = model.predict(input_features)[0]
    
    return predicted_price, mse


Test on GOTO ticker

In [5]:
if __name__ == "__main__":
    df = load_data()
    test_ticker = 'GOTO'
    
    # Load or train models
    models_file = 'models.pkl'
    if os.path.exists(models_file):
        with open(models_file, 'rb') as f:
            models = pickle.load(f)
    else:
        models = {}
        for ticker in df['Ticker'].unique():
            model, mse = train_model_for_ticker(ticker, df)
            if model:
                models[ticker] = {'model': model, 'mse': mse}
        with open(models_file, 'wb') as f:
            pickle.dump(models, f)
            
    # Make prediction
    predicted_price, mse = predict_next_price(test_ticker, df, models)
    if predicted_price is not None:
        print(f"MSE: {mse:.4f}")
        print(f"Last 5 prices: {get_last_5_closing_prices(test_ticker, df)}")
        print(f"Predicted next price: {predicted_price:.2f}")


MSE: 19.2703
Last 5 prices: [67. 65. 66. 66. 66.]
Predicted next price: 67.87


In [8]:
# augment the dataset with predicted close prices

import pandas as pd
import numpy as np

# assuming df is already loaded and preprocessed
# assuming models is a dictionary containing trained models for each ticker

# list to store prediction entries
predictions = []

# iterate over each ticker with a trained model
for ticker in models.keys():
    model = models[ticker]['model']
    # get data for the current ticker
    ticker_data = df[df['Ticker'] == ticker].copy()
    ticker_data.sort_values('Date', inplace=True)
    
    # get the last 5 closing prices and dates
    last_5_prices = ticker_data['Close'].iloc[-5:].values
    last_5_dates = ticker_data['Date'].iloc[-5:].values
    
    if len(last_5_prices) == 5:
        # reshape input features for prediction
        input_features = last_5_prices.reshape(1, -1)
        # predict the next closing price
        predicted_price = model.predict(input_features)[0]
        
        # calculate the next business day
        last_date = last_5_dates[-1]
        next_date = last_date + pd.offsets.BDay()
        
        # append the last 5 actual data points to predictions
        for date, price in zip(last_5_dates, last_5_prices):
            predictions.append({
                'Ticker': ticker,
                'Date': pd.to_datetime(date).date(),
                'Close': price,
                'Predicted_Close': np.nan
            })
        
        # append the predicted next closing price
        predictions.append({
            'Ticker': ticker,
            'Date': pd.to_datetime(next_date).date(),
            'Close': np.nan,
            'Predicted_Close': predicted_price
        })

# create a dataframe from the predictions list
predictions_df = pd.DataFrame(predictions)

# save the augmented dataset to a csv file
predictions_df.to_csv('idx_with_predictions.csv', index=False)

