# Econophysics and Sociophysics
Authors:

- Rofhiwa (Ralph) Matumba
- Faith Mabushe
- Enos Nemukula
- Philemon Ralukake

## Model training

In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
import plotly.graph_objects as go

### Data loading and visualisation

In this example, we are reading in stocks from an example company.

In [2]:
filename = 'ABG.JO'
data = pd.read_csv(f'../data/{filename}.csv')

Before we start using this data to make predictions from it, we will just display the last five data entries with column names so that we understand the structure of the data. As of downloading this dataset, the latest close was on the 1st of December 2023.

In [3]:
data.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
6135,2023-11-27,17727.0,17785.0,17330.0,17515.0,17515.0,2061279
6136,2023-11-28,17385.0,17664.0,17358.0,17522.0,17522.0,2097483
6137,2023-11-29,17500.0,17516.0,17133.0,17295.0,17295.0,3337846
6138,2023-11-30,17250.0,17533.0,17204.0,17376.0,17376.0,4495273
6139,2023-12-01,17527.0,17531.0,17215.0,17302.0,17302.0,1644414


To be able to train models, we will need to look at trading models based on the day before. So we will add lag columns that contain relevant stock prices from the day before. We will also drop the 'Adj. Close' column because it is the same as the 'Close' column for the most part.

In [4]:
stonks = go.Figure(go.Candlestick(x=data['Date'],
                                   open=data['Open'],
                                   high=data['High'],
                                   low=data['Low'],
                                   close=data['Close'],
                                   
                                   name=f'{filename} Stock Chart'))

stonks.show()

### Data preprocessing

In [5]:
data["Open_L"] = data["Open"].shift(1)
data["High_L"] = data["High"].shift(1)
data["Low_L"] = data["Low"].shift(1)
data["Close_L"] = data["Close"].shift(1)
data["Volume_L"] = data["Volume"].shift(1)

# Drop 'Adj Close' column and NaN columns
data = data.drop("Adj Close", axis=1)
data = data.dropna()

In [6]:
data.tail()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Open_L,High_L,Low_L,Close_L,Volume_L
6135,2023-11-27,17727.0,17785.0,17330.0,17515.0,2061279,17500.0,17769.0,17222.0,17728.0,3259300.0
6136,2023-11-28,17385.0,17664.0,17358.0,17522.0,2097483,17727.0,17785.0,17330.0,17515.0,2061279.0
6137,2023-11-29,17500.0,17516.0,17133.0,17295.0,3337846,17385.0,17664.0,17358.0,17522.0,2097483.0
6138,2023-11-30,17250.0,17533.0,17204.0,17376.0,4495273,17500.0,17516.0,17133.0,17295.0,3337846.0
6139,2023-12-01,17527.0,17531.0,17215.0,17302.0,1644414,17250.0,17533.0,17204.0,17376.0,4495273.0


### Data split

In [7]:
features = ["Open_L", "High_L", "Low_L", "Close_L", "Volume_L"]
targets = ["Open", "High", "Low", "Close", "Volume"]

X = data[features]
y = data[targets]

### Train-test split

In [8]:
def train_test_split(X, y, test_size):
    ind = int(len(y) - test_size * len(y))

    X_train, y_train = X[:ind], y[:ind]
    X_test, y_test = X[ind:], y[ind:]

    return X_train, y_train, X_test, y_test

In [9]:
test_size = 0.2
X_train, y_train, X_test, y_test = train_test_split(X, y, test_size=test_size)

### Model fitting

In [10]:
# Initialize a dictionary to store models and MSE
models = {}
mse_results = {}
pred = []

# Train separate models for each target variable in a for loop
for column in y.columns:
    # Initialize SVR for each target variable
    model = SVR()
    # Train the model
    model.fit(X_train, y_train[column])
    
    # Make predictions on the test set
    predictions = model.predict(X_test)
    pred.append(predictions)
    
    # Store the model and predictions
    models[column] = model
    mse_results[column] = mean_squared_error(y_test[column], predictions)

# Print the MSE for each target variable
for column, mse in mse_results.items():
    print(f'Mean Squared Error - {column}: {mse}')

pred = pd.DataFrame(np.array(pred).T)
pred.columns = targets

## Model testing

In [None]:
stonks = go.Figure(go.Candlestick(x=data['Date'],
                                   open=pred['Open'],
                                   high=pred['High'],
                                   low=pred['Low'],
                                   close=pred['Close'],
                                   name=f'{filename} Predicted Stock Chart'))

stonks.add_trace(go.Candlestick(x=data['Date'],
                                   open=y_test['Open'],
                                   high=y_test['High'],
                                   low=y_test['Low'],
                                   close=y_test['Close'],
                                   
                                   name=f'{filename} Stock Chart'))

stonks.show()