In [5]:
import pandas as pd
import numpy as np
import os
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [6]:
# Load dataset
df = pd.read_csv('idx.csv', parse_dates=['Date'])

# Sort
df.sort_values(['Ticker', 'Date'], inplace=True)

# missing values
df.dropna(inplace=True)

# Reset
df.reset_index(drop=True, inplace=True)

# Print
print('Data loaded successfully.')
print(f'Total records: {len(df)}')
print('Sample data:')
df.head()

Data loaded successfully.
Total records: 2619041
Sample data:


Unnamed: 0,Date,Ticker,Open,High,Low,Close,Adj Close,Volume
0,2001-04-05,AALI,571.710632,571.710632,547.889343,547.889343,248.808121,57721.0
1,2001-04-06,AALI,547.889343,547.889343,547.889343,547.889343,248.808121,83433.0
2,2001-04-09,AALI,547.889343,547.889343,547.889343,547.889343,248.808121,10494.0
3,2001-04-10,AALI,547.889343,547.889343,547.889343,547.889343,248.808121,352625.0
4,2001-04-11,AALI,547.889343,547.889343,547.889343,547.889343,248.808121,59295.0


In [10]:
def train_model_for_ticker(ticker, df):
    ticker_data = df[df['Ticker'] == ticker].copy()
    ticker_data.sort_values('Date', inplace=True)
    ticker_data.reset_index(drop=True, inplace=True)
    
    # Check if there's enough data
    if len(ticker_data) < 10:
        return None, None

    # Create lag features
    for lag in range(1, 6):
        ticker_data[f'lag_{lag}'] = ticker_data['Close'].shift(lag)

    # Drop rows with NaN values
    ticker_data.dropna(inplace=True)
    
    # Features and target
    X = ticker_data[[f'lag_{lag}' for lag in range(1, 6)]]
    y = ticker_data['Close']

    # If less than 10 data after dropna, it will not count
    if len(X) < 10:
        return None, None

    # Split training and test sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, shuffle=False
    )

    # Train Linear Regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Calculate MSE on the test set
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)

    return model, mse

In [8]:
def get_last_5_closing_prices(ticker, df):
    ticker_data = df[df['Ticker'] == ticker].copy()
    ticker_data.sort_values('Date', inplace=True)
    return ticker_data['Close'].iloc[-5:].values

In [9]:
# File to store the models
models_file = 'models.pkl'

# Check if models are already saved
if os.path.exists(models_file):
    with open(models_file, 'rb') as f:
        models = pickle.load(f)
    print('Models loaded from file.')
else:
    # Get unique tickers
    tickers = df['Ticker'].unique()

    # Dictionary to store models and MSE
    models = {}
    for ticker in tickers:
        model, mse = train_model_for_ticker(ticker, df)
        if model:
            models[ticker] = {'model': model, 'mse': mse}
            print(f'Model trained for {ticker}, MSE: {mse:.4f}')
        else:
            print(f'Insufficient data to train model for {ticker}.')
    
    # Save models to a file
    with open(models_file, 'wb') as f:
        pickle.dump(models, f)
    print('Models saved to file.')

Model trained for AALI, MSE: 45033.5029
Model trained for ABBA, MSE: 97.0269
Model trained for ABDA, MSE: 13851.7374
Model trained for ABMM, MSE: 8981.6788
Model trained for ACES, MSE: 620.7367
Model trained for ACRO, MSE: 11.8087
Model trained for ACST, MSE: 24.2007
Model trained for ADCP, MSE: 0.5640
Model trained for ADES, MSE: 29448.9677
Model trained for ADHI, MSE: 603.3303
Model trained for ADMF, MSE: 20311.5384
Model trained for ADMG, MSE: 24.7557
Model trained for ADMR, MSE: 1585.1430
Model trained for ADRO, MSE: 4620.1014
Model trained for AEGS, MSE: 12.2448
Model trained for AGAR, MSE: 16.1595
Model trained for AGII, MSE: 1373.7862
Model trained for AGRO, MSE: 2134.1315
Model trained for AGRS, MSE: 4.2137
Model trained for AHAP, MSE: 14.2690
Model trained for AIMS, MSE: 396.1722
Model trained for AISA, MSE: 42.2029
Model trained for AKKU, MSE: 0.1214
Model trained for AKPI, MSE: 911.5030
Model trained for AKRA, MSE: 750.4242
Model trained for AKSI, MSE: 578.3440
Model trained

test GOTO ticker

In [11]:
test_ticker = 'GOTO'

# Checking ticker has trained model or not
if test_ticker in models:
    mse = models[test_ticker]['mse']
    print(f'Model Mean Squared Error for {test_ticker}: {mse:.4f}')
    
    # Get last 5 closing prices
    last_5_prices = get_last_5_closing_prices(test_ticker, df)
    print(f'Last 5 closing prices for {test_ticker}: {last_5_prices}')
    
    # Ensure there are enough data points
    if len(last_5_prices) == 5:
        # Predict next closing price
        model = models[test_ticker]['model']
        input_features = last_5_prices.reshape(1, -1)
        predicted_price = model.predict(input_features)[0]
        print(f'Predicted next closing price for {test_ticker}: {predicted_price:.2f}')
    else:
        print('Not enough data to make a prediction for this ticker.')
else:
    print(f'No trained model available for {test_ticker}.')

Model Mean Squared Error for GOTO: 19.2703
Last 5 closing prices for GOTO: [67. 65. 66. 66. 66.]
Predicted next closing price for GOTO: 67.87


