<a href="https://colab.research.google.com/github/Mik1810/AI-assignment/blob/main/finance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install ta -q # Technical Analysis library

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for ta (setup.py) ... [?25l[?25hdone


In [8]:
!pip install yfinance -q # Yahoo Finance library

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [22]:
!pip install catboost -q # Catboost

In [9]:
# Importing Libraries

# Data Handling
import pandas as pd
import numpy as np

# Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
import plotly.subplots as sp
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

# Financial Data Analysis
import yfinance as yf
import ta
import quantstats as qs

# Machine Learning
from sklearn.metrics import confusion_matrix, roc_auc_score, auc, roc_curve

# Classification ML algorithms
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier

# Feature Selection
from sklearn.feature_selection import SelectKBest, f_classif

# Oversampling minority class for imbalanced datasets
from imblearn.over_sampling import SMOTE

# Feature Importance
from sklearn.inspection import permutation_importance

# Hiding warnings
import warnings
warnings.filterwarnings("ignore")

In [17]:
lef = yf.download('LE=F', end = '2023-05-29',
                    interval = '1wk') # 1-week timeframe

[*********************100%%**********************]  1 of 1 completed


In [21]:
candlestick = go.Candlestick(x = lef.index,
                            open = lef['Open'],
                            high = lef['High'],
                            low = lef['Low'],
                            close = lef['Adj Close'],
                            increasing = dict(line=dict(color = 'skyblue')),
                            decreasing = dict(line=dict(color = 'darkblue')))

# Defining layout
layout = go.Layout(title = {'text': '<b>Live Cattle Futures</b> <br><sup>(March,2002 - May, 2023)</sup>',
                           'x': 0.5,
                           'xanchor': 'center',
                           'yanchor': 'top',
                           'font': {
                               'size': 25
                           }},
                  yaxis = dict(title = 'Price (USD)'),
                  xaxis = dict(title = 'Date'),
                  template = 'ggplot2',
                  xaxis_rangeslider_visible = True,
                  yaxis_gridcolor = 'lightgray',
                  yaxis_tickfont = dict(color = 'black'),
                  xaxis_tickfont = dict(color = 'black'),
                  margin = dict(t = 80, l = 50, r = 50, b = 50),
                  height = 650, width = 1000,
                  plot_bgcolor = 'white')

fig = go.Figure(data = [candlestick], layout = layout)

fig.show(renderer="colab")

In [22]:
#Splitting data into training & testing sets
train = lef[lef.index.year <= 2012]
test = lef[lef.index.year >= 2013]

In [23]:
def new_features(df):

    # Features related to price behavior
    df['high_low_ratio'] = df['High'] / df['Low']
    df['open_adjclose_ratio'] = df['Adj Close'] / df['Open']
    df['candle_to_wick_ratio'] = (df['Adj Close'] - df['Open']) / (df['High'] - df['Low'])
    upper_wick_size = df['High'] - df[['Open', 'Adj Close']].max(axis = 1)
    lower_wick_size = df[['Open', 'Adj Close']].min(axis = 1) - df['Low']
    df['upper_to_lower_wick_ratio'] = upper_wick_size /  lower_wick_size

    # Laggings
    df['lag1'] = df['Adj Close'].shift(1)
    df['lag2'] = df['Adj Close'].shift(2)
    df['lag3'] = df['Adj Close'].shift(3)
    df['lag4'] = df['Adj Close'].shift(4)
    df['lag5'] = df['Adj Close'].shift(5)

    # Laggings ratio
    df['close_to_lag1_ratio'] = df['Adj Close'] / df['lag1']
    df['close_to_lag2_ratio'] = df['Adj Close'] / df['lag2']
    df['close_to_lag3_ratio'] = df['Adj Close'] / df['lag3']
    df['close_to_lag4_ratio'] = df['Adj Close'] / df['lag4']
    df['close_to_lag5_ratio'] = df['Adj Close'] / df['lag5']

    # Exponential Moving Average
    df['ema8'] = ta.trend.ema_indicator(df['Adj Close'], window = 8)

    # Volatility features
    df['9_days_volatility'] = df['Adj Close'].pct_change().rolling(window = 9).std()
    df['20_days_volatility'] = df['Adj Close'].pct_change().rolling(window = 20).std()
    df['9_to_20_day_vol_ratio'] = df['9_days_volatility'] / df['20_days_volatility']

    # Technical indicators
    df['rsi'] = ta.momentum.RSIIndicator(df['Adj Close']).rsi()
    df['rsi_overbought'] = (df['rsi'] >= 70).astype(int)
    df['rsi_oversold'] = (df['rsi'] <= 30).astype(int)
    df['cci'] = ta.trend.cci(df['High'], df['Low'], df['Adj Close'], window=10, constant=0.015)
    df['obv'] = ta.volume.OnBalanceVolumeIndicator(close=df['Adj Close'], volume=df['Volume']).on_balance_volume()
    df['obv_divergence_5_days'] = df['obv'].diff().rolling(10).sum() - df['Adj Close'].diff().rolling(5).sum()
    df['obv_divergence_10_days'] = df['obv'].diff().rolling(20).sum() - df['Adj Close'].diff().rolling(10).sum()
    df['ADX'] = ta.trend.ADXIndicator(df['High'], df['Low'], df['Adj Close'], window = 10).adx()
    df['ADI'] = ta.volume.AccDistIndexIndicator(df['High'], df['Low'], df['Adj Close'], df['Volume']).acc_dist_index()

    # Weekly returns
    df['returns'] = np.round((df['Adj Close'].pct_change()) * 100, 2)

    # Ratio between the closing price and the 8-period exponential moving average
    df['close_to_ema8_ratio'] = df['Adj Close'] / df['ema8']

    # Replacing infinite values by zeros
    df = df.replace([np.inf, -np.inf], 0)

    # Removing NaN values
    df = df.dropna()
    return df

In [24]:
# Applyig new_features function to both datasets
train = new_features(train)
test = new_features(test)

In [25]:
train # Displaying results

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,high_low_ratio,open_adjclose_ratio,candle_to_wick_ratio,upper_to_lower_wick_ratio,...,rsi_overbought,rsi_oversold,cci,obv,obv_divergence_5_days,obv_divergence_10_days,ADX,ADI,returns,close_to_ema8_ratio
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2002-07-22,65.800003,65.800003,64.224998,64.900002,64.900002,0,1.024523,0.986322,-0.571428,0.000000,...,0,0,71.148622,0,-1.175003,-3.150002,53.089992,0.000000,-1.22,1.006925
2002-07-29,64.925003,65.000000,64.449997,64.699997,64.699997,0,1.008534,0.996534,-0.409100,0.299988,...,0,0,51.315128,0,-1.299995,-2.624996,51.599305,0.000000,-0.31,1.002970
2002-08-05,64.324997,65.224998,64.175003,65.224998,65.224998,0,1.016361,1.013991,0.857148,0.000000,...,0,0,49.847205,0,-0.549995,-4.750000,50.463234,0.000000,0.81,1.008619
2002-08-26,64.750000,65.175003,64.500000,64.724998,64.724998,0,1.010465,0.999614,-0.037039,1.888915,...,0,0,-20.358467,0,0.175003,-1.000000,44.688827,0.000000,-0.50,0.996962
2002-09-16,69.775002,69.824997,69.699997,69.699997,69.699997,0,1.001793,0.998925,-0.600037,0.000000,...,0,0,141.617928,0,-3.724998,-4.299995,38.466499,0.000000,-0.78,1.035066
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2012-12-03,126.625000,127.000000,125.500000,125.875000,125.875000,54137,1.011952,0.994077,-0.500000,1.000000,...,0,0,27.782588,-485103,212394.550003,494765.199997,20.364995,-842575.437873,-0.67,0.998634
2012-12-10,125.974998,127.400002,125.175003,126.900002,126.900002,63411,1.017775,1.007343,0.415732,0.625004,...,0,0,35.008746,-421692,224042.849998,492254.150002,20.851077,-807663.783260,0.81,1.005254
2012-12-17,127.425003,129.725006,127.400002,129.274994,129.274994,26724,1.018250,1.014518,0.795693,17.999390,...,0,0,157.649873,-394968,215978.875008,454451.625008,22.755490,-791284.817706,1.87,1.018620
2012-12-24,129.000000,129.600006,128.399994,129.399994,129.399994,7372,1.009346,1.003101,0.333325,0.333350,...,0,0,125.563012,-387596,208484.550003,407931.900009,24.469461,-786370.276024,0.10,1.015182


In [28]:
candlestick = go.Candlestick(x = train.index,
                            open = train['Open'],
                            high = train['High'],
                            low = train['Low'],
                            close = train['Adj Close'],
                            increasing = dict(line=dict(color = 'skyblue')),
                            decreasing = dict(line=dict(color = 'darkblue')),
                            name = 'Candles')

# Setting moving average on plot
ema8 = go.Scatter(x = train.index,
                 y = train['ema8'],
                 mode = 'lines',
                 line=dict(color='orange', width = 2),
                 name = 'EMA - 8 Weeks')

# Defining the overall layout
layout = go.Layout(title = {'text': '<b>Live Cattle Futures</b> <br><sup>(July,2002 - December, 2012)</sup>',
                           'x': 0.5,
                           'xanchor': 'center',
                           'yanchor': 'top',
                           'font': {
                               'size': 25
                           }},
                  yaxis = dict(title = 'Price (USD)'),
                  xaxis = dict(title = 'Date'),
                  template = 'ggplot2',
                  xaxis_rangeslider_visible = True,
                  yaxis_gridcolor = 'lightgray',
                  yaxis_tickfont = dict(color = 'black'),
                  xaxis_tickfont = dict(color = 'black'),
                  margin = dict(t = 80, l = 50, r = 50, b = 50),
                  height = 650, width = 1000,
                  plot_bgcolor = 'white')

fig = go.Figure(data = [candlestick, ema8], layout = layout)


fig.show(renderer="colab")

In [26]:
# Boxplot
fig = px.box(train, x = 'close_to_ema8_ratio', points = 'all')

# Layout
fig.update_traces(boxpoints = 'all', marker_color = 'lightsteelblue')
fig.update_layout(height = 350, width = 1000,
                 template = 'plotly_white',
                  title = {'text': '<b>Close-to-Exponential Moving Average 8 Ratio - Boxplot</b>',
                  'x': 0.5,
                  'xanchor': 'center',
                  'yanchor': 'top',
                  'font': {
                      'size': 25
                  }},
                  margin = dict(t=100)
                 )

fig.show(renderer="colab")

In [29]:
# Creating target variable
def target(df):
    df['target'] = np.where(df['close_to_ema8_ratio'] <= 0.998, 0, 1)
    df['target'] = df['target'].shift(-1)
    df = df.dropna()
    return df

In [30]:
# Applying function
train = target(train)
test = target(test)

In [31]:
train

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,high_low_ratio,open_adjclose_ratio,candle_to_wick_ratio,upper_to_lower_wick_ratio,...,rsi_oversold,cci,obv,obv_divergence_5_days,obv_divergence_10_days,ADX,ADI,returns,close_to_ema8_ratio,target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2002-07-22,65.800003,65.800003,64.224998,64.900002,64.900002,0,1.024523,0.986322,-0.571428,0.000000,...,0,71.148622,0,-1.175003,-3.150002,53.089992,0.000000,-1.22,1.006925,1.0
2002-07-29,64.925003,65.000000,64.449997,64.699997,64.699997,0,1.008534,0.996534,-0.409100,0.299988,...,0,51.315128,0,-1.299995,-2.624996,51.599305,0.000000,-0.31,1.002970,1.0
2002-08-05,64.324997,65.224998,64.175003,65.224998,65.224998,0,1.016361,1.013991,0.857148,0.000000,...,0,49.847205,0,-0.549995,-4.750000,50.463234,0.000000,0.81,1.008619,0.0
2002-08-26,64.750000,65.175003,64.500000,64.724998,64.724998,0,1.010465,0.999614,-0.037039,1.888915,...,0,-20.358467,0,0.175003,-1.000000,44.688827,0.000000,-0.50,0.996962,1.0
2002-09-16,69.775002,69.824997,69.699997,69.699997,69.699997,0,1.001793,0.998925,-0.600037,0.000000,...,0,141.617928,0,-3.724998,-4.299995,38.466499,0.000000,-0.78,1.035066,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2012-11-26,128.949997,129.024994,126.550003,126.724998,126.724998,58863,1.019557,0.982745,-0.898993,0.428565,...,0,111.606654,-430966,197239.775002,659422.800003,20.151208,-815506.937873,-1.73,1.004985,1.0
2012-12-03,126.625000,127.000000,125.500000,125.875000,125.875000,54137,1.011952,0.994077,-0.500000,1.000000,...,0,27.782588,-485103,212394.550003,494765.199997,20.364995,-842575.437873,-0.67,0.998634,1.0
2012-12-10,125.974998,127.400002,125.175003,126.900002,126.900002,63411,1.017775,1.007343,0.415732,0.625004,...,0,35.008746,-421692,224042.849998,492254.150002,20.851077,-807663.783260,0.81,1.005254,1.0
2012-12-17,127.425003,129.725006,127.400002,129.274994,129.274994,26724,1.018250,1.014518,0.795693,17.999390,...,0,157.649873,-394968,215978.875008,454451.625008,22.755490,-791284.817706,1.87,1.018620,1.0


In [32]:
# Pie plot
fig = px.pie(train, names = 'target', hole = .75)

# Layout
fig.update_traces(marker=dict(colors=['red', 'blue']))

fig.update_layout(title = {'text':'<b>Distribution of Classes in the Target Variable</b>',
                           'x': 0.5,
                           'xanchor': 'center',
                           'yanchor': 'top',
                           'font': {
                               'size': 25
                           }},
                  margin=dict(t=100)
                 )
fig.show(renderer="colab")