In [10]:
import pandas as pd 
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt 
import matplotlib.dates as dates

import os 
import random
from contextlib import contextmanager
from time import time
from tqdm import tqdm

import plotly.express as px 
import plotly.graph_objects as go 
from plotly.subplots import make_subplots

import lightgbm as lgbm
import category_encoders as ce 
from sklearn.metrics import classification_report, log_loss, accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold

In [17]:
def to_categorical(y, num_classes):
    return np.eye(num_classes, dtype='uint8')[y]

In [18]:
df0 = pd.read_csv('./input/coffee.csv')
print(len(df0))

5671


In [19]:
data = df0.reset_index(drop=True)
display(data)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Currency
0,2000-01-03,122.25,124.00,116.10,116.50,6640,USD
1,2000-01-04,116.25,120.50,115.75,116.25,5492,USD
2,2000-01-05,115.00,121.00,115.00,118.60,6165,USD
3,2000-01-06,119.00,121.40,116.50,116.85,5094,USD
4,2000-01-07,117.25,117.75,113.80,114.15,6855,USD
...,...,...,...,...,...,...,...
5666,2022-05-11,204.05,220.45,204.05,219.90,29972,USD
5667,2022-05-12,217.90,218.50,212.55,215.30,18989,USD
5668,2022-05-13,215.90,216.85,209.65,212.90,19974,USD
5669,2022-05-16,224.00,224.00,223.50,225.80,5,USD


In [20]:
data['Year'] = data['Date'].apply(lambda x: int(x[0:4]))
data['Month'] = data['Date'].apply(lambda x: int(x[5:7]))
data['Year-Month'] = data['Date'].apply(lambda x: x[0:7])
data['Date'] = pd.to_datetime(data['Date'])
display(data)


Unnamed: 0,Date,Open,High,Low,Close,Volume,Currency,Year,Month,Year-Month
0,2000-01-03,122.25,124.00,116.10,116.50,6640,USD,2000,1,2000-01
1,2000-01-04,116.25,120.50,115.75,116.25,5492,USD,2000,1,2000-01
2,2000-01-05,115.00,121.00,115.00,118.60,6165,USD,2000,1,2000-01
3,2000-01-06,119.00,121.40,116.50,116.85,5094,USD,2000,1,2000-01
4,2000-01-07,117.25,117.75,113.80,114.15,6855,USD,2000,1,2000-01
...,...,...,...,...,...,...,...,...,...,...
5666,2022-05-11,204.05,220.45,204.05,219.90,29972,USD,2022,5,2022-05
5667,2022-05-12,217.90,218.50,212.55,215.30,18989,USD,2022,5,2022-05
5668,2022-05-13,215.90,216.85,209.65,212.90,19974,USD,2022,5,2022-05
5669,2022-05-16,224.00,224.00,223.50,225.80,5,USD,2022,5,2022-05


In [21]:
data['Close shift1'] = data['Close'].shift(1)
data['Open - Close shift1'] = data['Open'] - data['Close shift1']


data['High 5day max']=data['High'].rolling(window=5).max()
data['Low 5day min']=data['Low'].rolling(window=5).min()
data['5-day profit max'] = data['High 5day max'] - data['Low 5day min']      

data['MA3']=data['Close'].rolling(window=3).mean()
data['MA4']=data['Close'].rolling(window=4).mean()
data['MA5']=data['Close'].rolling(window=5).mean()
data['MA8']=data['Close'].rolling(window=8).mean()
data['MA10']=data['Close'].rolling(window=10).mean()
data['MA25']=data['Close'].rolling(window=25).mean()
data['MA75']=data['Close'].rolling(window=75).mean()

data['MA3 slope']=(data['MA3']-data['MA3'].shift(5))/5
data['MA4 slope']=(data['MA4']-data['MA4'].shift(5))/5
data['MA5 slope']=(data['MA5']-data['MA5'].shift(5))/5
data['MA8 slope']=(data['MA8']-data['MA8'].shift(5))/5
data['MA10 slope']=(data['MA10']-data['MA10'].shift(5))/5
data['MA25 slope']=(data['MA25']-data['MA25'].shift(5))/5
data['MA75 slope']=(data['MA75']-data['MA75'].shift(5))/5

data['Annual Growth Rate'] = data['MA25']*100/data['MA25'].shift(252)

In [22]:
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Currency,Year,Month,Year-Month,...,MA25,MA75,MA3 slope,MA4 slope,MA5 slope,MA8 slope,MA10 slope,MA25 slope,MA75 slope,Annual Growth Rate
0,2000-01-03,122.25,124.0,116.1,116.5,6640,USD,2000,1,2000-01,...,,,,,,,,,,
1,2000-01-04,116.25,120.5,115.75,116.25,5492,USD,2000,1,2000-01,...,,,,,,,,,,
2,2000-01-05,115.0,121.0,115.0,118.6,6165,USD,2000,1,2000-01,...,,,,,,,,,,
3,2000-01-06,119.0,121.4,116.5,116.85,5094,USD,2000,1,2000-01,...,,,,,,,,,,
4,2000-01-07,117.25,117.75,113.8,114.15,6855,USD,2000,1,2000-01,...,,,,,,,,,,


In [23]:
fig = make_subplots(specs=[[{'secondary_y':False}]])
fig.add_trace(go.Scatter(x=data['Date'], y=data['Close'], name='Close'), secondary_y=False)
fig.add_trace(go.Scatter(x=data['Date'], y=data['High'], name='High'), secondary_y=False)
fig.add_trace(go.Scatter(x=data['Date'], y=data['Low'], name='Low'), secondary_y=False)
fig.update_layout(autosize=False, width=700, height=400, title_text='Daily change')
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text='USD', secondary_y=False)
fig.show()


In [24]:
fig=make_subplots(specs=[[{"secondary_y":False}]])
fig.add_trace(go.Scatter(x=data['Date'],y=data['MA25'],name='MA25'),secondary_y=False,)
fig.add_trace(go.Scatter(x=data['Date'],y=data['MA75'],name='MA75'),secondary_y=False,)
fig.update_layout(autosize=False,width=700,height=400,title_text="Moving Average")
fig.update_xaxes(title_text="Date")
fig.update_yaxes(title_text="USD",secondary_y=False)
fig.show()