# 7FNCE043W Artificial Intelligence and Machine Learning in Finance Services

In [None]:
#importing the necessary libraries

import pandas as pd # mathematical analysis library
import numpy as np # number and array manipulation
import matplotlib.pyplot as plt #  for plotting
import statistics as stats #Stats library
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import seaborn as sns
import yfinance as yf
from ta.volume import MFIIndicator
import ta
from ta.momentum import roc  # Techinical Analysis library for feature generation
from ta import add_all_ta_features as ta
from sklearn.preprocessing import MinMaxScaler, StandardScaler
#from ta.volatility import bollinger_bands

In [None]:
ticker = "BCS"
start = "2014-03-04"
end = "2024-03-06"
df = yf.download(ticker, start =start, end=end)

In [None]:
# making new variables for the df

open = df
high = df['High']
low = df['Low']
close=  df['Close']
adjclose = df['Adj Close'] 
volume = df['Volume']

In [None]:
df.describe()
total = sum(df['Adj Close'].value_counts()) # total number of trading days

In [None]:
plt.figure(figsize=(15, 5))
plt.plot(adjclose, color='red')
plt.ylabel('Price $')
plt.xlabel('Date')
plt.ylim()
plt.xlim()
plt.grid()



In [None]:
returns = df['Close'].pct_change()
annualised_returns = (returns.mean() * 252).round(5) 
print(f'annualised returns: {annualised_returns}')

##  Features For Feature Selection


##### Feature 1 : Daily Returns, The percentage change in price from the previous day's closing price.

In [None]:
import ta
dprice_pct = ta.others.daily_return(close, fillna=True)
df['drr'] = dprice_pct

df['drr']

In [None]:
dprice_pct.describe()

In [None]:
plt.figure(figsize =(15, 5))
plt.plot(dprice_pct, color ='blue', linewidth=0.5, alpha=0.1)
plt.fill_between(dprice_pct.index, 0, dprice_pct.where(dprice_pct > 0), facecolor='green', interpolate=True, label='Positive gains')
plt.fill_between(dprice_pct.index, 0, dprice_pct.where(dprice_pct < 0), facecolor='red', interpolate=True, label='Negative gains')
plt.grid()
plt.xticks(rotation =45)
plt.yticks()
plt.legend()

##### Feature 2: Rate of change (Momentum)

In [None]:
df['ROC'] = roc(df['Close'], window=20,fillna=True) # Rate of Change in momentum separted in 252 days to accouont for trading

In [None]:
plt.figure(figsize=(20, 5))
plt.plot(df['ROC'], linewidth= 0.2)
plt.fill_between(df['ROC'].index, 0, df['ROC'].where(df['ROC'] > 0), facecolor='green', interpolate=True, label='Positive gains',linewidth=2)
plt.fill_between(df['ROC'].index, 0, df['ROC'].where(df['ROC'] < 0), facecolor='red', interpolate=True, label='Negative gains')
plt.grid()
plt.ylabel('% change')
plt.xlabel('Date')
plt.tight_layout()
plt.xticks(rotation= 45)

##### Feature 3: Money Flow Index

In [None]:

mfi_values = ta.volume.money_flow_index(high=high, low=low, close=close, volume=volume, window=252, fillna=True)
df['mfi'] = mfi_values

In [None]:
df['mfi'].describe()

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(df['mfi'], linewidth= 1, alpha=1)
plt.tight_layout()
plt.grid()
plt.title('money flow index')
plt.ylabel('money flow index')
plt.xlabel('Date')
plt.xlim()
plt.xticks(rotation= 45)

#### Feature 4: Relative Strength Index

In [None]:
df['rsi'] = ta.momentum.rsi(close, window=252,fillna=True)

In [None]:
df['rsi'].describe()

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(df['rsi'], linewidth= 1, alpha=1)
plt.tight_layout()
plt.grid()
plt.title('relative strength index')
plt.ylabel('% change')
plt.xlabel('Date')
plt.xlim()
plt.xticks(rotation= 45)

#### Feature 5: Bollinger Band Width

In [None]:
bb = ta.volatility.BollingerBands(close, window =252, window_dev=2, fillna=True)
bandwidth_values = bb.bollinger_wband()
df['bollbandbwidth'] = bandwidth_values

In [None]:
df['bollbandbwidth'].describe()

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(df['bollbandbwidth'], linewidth= 1, alpha=1)
plt.tight_layout()
plt.grid()
plt.title('bollinger band width')
plt.ylabel('Band Width')
plt.xlabel('Date')
plt.xlim()
plt.xticks(rotation= 45)

#### Feature 6: Avg True Range

In [None]:
atr = ta.volatility.AverageTrueRange(high, low, close, window = 252, fillna=True)
atr_values = atr.average_true_range()
df['atr'] = atr_values

In [None]:
df['atr'].describe()

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(df['atr'], linewidth= 1, alpha=1)
plt.tight_layout()
plt.grid()
plt.title('Avg True Range')
plt.ylabel('% change')
plt.xlabel('Date')
plt.xlim()
plt.xticks(rotation= 45)

#### Feature 7: On-Balance Volume

In [None]:
obv = ta.volume.OnBalanceVolumeIndicator(close,volume, fillna=True)
obv_values = obv.on_balance_volume()
df['obv'] = obv_values

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(df['obv'], linewidth= 1, alpha=0.1)
plt.tight_layout()
plt.grid()
plt.title('On-Balance Volume')
plt.ylabel('% change')
plt.xlabel('Date')
plt.xlim()
plt.xticks(rotation= 45)
plt.fill_between(df['obv'].index, 0, df['obv'].where(df['obv'] > 0), facecolor='green', interpolate=True, label='Positive gains',linewidth=2)
plt.fill_between(df['obv'].index, 0, df['obv'].where(df['obv'] < 0), facecolor='red', interpolate=True, label='Negative gains')


#### Feature 8: Daily Log Returns 

In [None]:
dlr_values = ta.others.daily_log_return(close, fillna=True)
df['dlr'] = dlr_values

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(df['dlr'], linewidth= 1, alpha=0.1)
plt.tight_layout()
plt.grid()
plt.title('Daily Log Returns')
plt.fill_between(df['dlr'].index, 0, df['dlr'].where(df['dlr'] > 0), facecolor='green', interpolate=True, label='Positive gains',linewidth=2)
plt.fill_between(df['dlr'].index, 0, df['dlr'].where(df['dlr'] < 0), facecolor='red', interpolate=True, label='Negative gains')

plt.ylabel('')
plt.xlabel('Date')
plt.xlim()
plt.xticks(rotation= 45)

## Data Normalisation

In [None]:
df_features= pd.DataFrame({
    'adjclose' : df['Adj Close'],
    'roc' : df['ROC'], 
    'atr' : df['atr'],
    'obv' : df['obv'],
    'rsi' : df['rsi'],
    'bollbw': df['bollbandbwidth'],
    'dlr': df['dlr'],
    'mfi' : df['mfi'],
    'daily_returns' : df['drr']
     })


In [None]:
# Initialize the scaler
scaler = StandardScaler()

# Fit the scaler to your data and transform it
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train your linear regression model using the scaled data
model.fit(X_train_scaled, y_train)

# Make predictions and evaluate the model
y_pred_scaled = model.predict(X_test_scaled)
mse_scaled = mean_squared_error(y_test, y_pred_scaled)
print(f"Scaled Mean Squared Error: {mse_scaled}")

df_features


## Model Selection

#### Linear Regression Model

In [None]:
df_features

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df_features, df_features['adjclose'], test_size=0.15, random_state=42)

#
model = LinearRegression()

# Train the model using the training sets
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Optionally, you can also print the coefficients of the linear regression model
print("Coefficient:", model.coef_)
