# Econophysics and Sociophysics
Authors:

- Rofhiwa (Ralph) Matumba
- Faith Mabushe
- Enos Nemukula
- Philemon Ralukake

## Model training

In [162]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
import plotly.graph_objects as go

### Data loading and visualisation

In this example, we are reading in stocks from an example company.

In [163]:
filename = 'AAPL'
data = pd.read_csv(f'../data/{filename}.csv')

Before we start using this data to make predictions from it, we will just display the last five data entries with column names so that we understand the structure of the data. As of downloading this dataset, the latest close was on the 1st of December 2023.

In [164]:
data.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
10829,2023-11-27,189.919998,190.669998,188.899994,189.789993,189.789993,40552600
10830,2023-11-28,189.779999,191.080002,189.399994,190.399994,190.399994,38415400
10831,2023-11-29,190.899994,192.089996,188.970001,189.369995,189.369995,43014200
10832,2023-11-30,189.839996,190.320007,188.190002,189.949997,189.949997,48794400
10833,2023-12-01,190.330002,191.559998,189.229996,191.240005,191.240005,45679300


To be able to train models, we will need to look at trading models based on the day before. So we will add lag columns that contain relevant stock prices from the day before. We will also drop the 'Adj. Close' column because it is the same as the 'Close' column for the most part.

In [165]:
stonks = go.Figure(go.Candlestick(x=data['Date'],
                                   open=data['Open'],
                                   high=data['High'],
                                   low=data['Low'],
                                   close=data['Close'],
                                   
                                   name=f'{filename} Stock Chart'))

stonks.show()

### Data preprocessing

In [166]:
data["Open_L"] = data["Open"].shift(1)
data["High_L"] = data["High"].shift(1)
data["Low_L"] = data["Low"].shift(1)
data["Close_L"] = data["Close"].shift(1)
data["Volume_L"] = data["Volume"].shift(1)

# Drop 'Adj Close' column and NaN columns
data = data.drop("Adj Close", axis=1)
data = data.dropna()

In [167]:
data.tail()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Open_L,High_L,Low_L,Close_L,Volume_L
10829,2023-11-27,189.919998,190.669998,188.899994,189.789993,40552600,190.869995,190.899994,189.25,189.970001,24048300.0
10830,2023-11-28,189.779999,191.080002,189.399994,190.399994,38415400,189.919998,190.669998,188.899994,189.789993,40552600.0
10831,2023-11-29,190.899994,192.089996,188.970001,189.369995,43014200,189.779999,191.080002,189.399994,190.399994,38415400.0
10832,2023-11-30,189.839996,190.320007,188.190002,189.949997,48794400,190.899994,192.089996,188.970001,189.369995,43014200.0
10833,2023-12-01,190.330002,191.559998,189.229996,191.240005,45679300,189.839996,190.320007,188.190002,189.949997,48794400.0


### Data split

In [168]:
features = ["Open_L", "High_L", "Low_L", "Close_L", "Volume_L"]
targets = ["Open", "High", "Low", "Close", "Volume"]

X = data[features]
y = data[targets]

### Train-test split

In [169]:
def train_test_split(X, y, test_size):
    ind = int(len(y) - test_size * len(y))

    X_train, y_train = X[:ind], y[:ind]
    X_test, y_test = X[ind:], y[ind:]

    return X_train, y_train, X_test, y_test

In [170]:
test_size = 0.2
X_train, y_train, X_test, y_test = train_test_split(X, y, test_size=test_size)

### Model fitting

In [171]:
# Train the model
model = LinearRegression()
model.fit(X_train, y_train)

## Model testing

In [172]:
pred = model.predict(X_test)
pred = pd.DataFrame(pred)
pred.columns = targets

In [173]:
stonks = go.Figure(go.Candlestick(x=data['Date'],
                                   open=pred['Open'],
                                   high=pred['High'],
                                   low=pred['Low'],
                                   close=pred['Close'],
                                   name=f'{filename} Predicted Stock Chart'))

stonks.add_trace(go.Candlestick(x=data['Date'],
                                   open=y_test['Open'],
                                   high=y_test['High'],
                                   low=y_test['Low'],
                                   close=y_test['Close'],
                                   
                                   name=f'{filename} Stock Chart'))

stonks.show()