# Library

In [34]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier

from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

import warnings
warnings.filterwarnings('ignore')

# Initiate Data

In [35]:
stockEnergy_List = ["BP","E","XOM","BKR","BP"]

In [36]:
# buat dataframe kosong
df_concat = pd.DataFrame()

for stock in stockEnergy_List:
  path = "/content/"+stock+"data.csv"
  df = pd.read_csv(path)
  df['open-close'] = df['Open'] - df['Close']
  df['low-high'] = df['Low'] - df['High']
  df['target'] = np.where(df['Close'].shift(-1) > df['Close'], 1, 0)

  df['Date'] = pd.to_datetime(df['Date'], utc=True)
  df['day'] = df['Date'].dt.day
  df['month'] = df['Date'].dt.month
  df['year'] = df['Date'].dt.year
  df['is_quarter_end'] = np.where(df['month']%3==0,1,0)

  # gabungkan dataframe setiap saham ke dataframe utama
  df_concat = pd.concat([df_concat, df], ignore_index=True)

In [52]:
df_concat.to_csv('stockEnergydata.csv')

In [37]:
df_concat

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,open-close,low-high,target,day,month,year,is_quarter_end
0,2019-01-02 05:00:00+00:00,29.451459,30.449681,29.404298,30.331779,5537100,0.0,0.0,-0.880320,-1.045383,1,2,1,2019,0
1,2019-01-03 05:00:00+00:00,30.622606,30.701208,30.245326,30.504707,7137300,0.0,0.0,0.117898,-0.455882,1,3,1,2019,0
2,2019-01-04 05:00:00+00:00,31.102065,31.502927,30.984164,31.463625,9336700,0.0,0.0,-0.361560,-0.518764,1,4,1,2019,0
3,2019-01-07 05:00:00+00:00,31.204251,31.683713,31.109932,31.565811,6114300,0.0,0.0,-0.361560,-0.573781,0,7,1,2019,0
4,2019-01-08 05:00:00+00:00,31.581526,31.644405,31.306427,31.463625,5654900,0.0,0.0,0.117901,-0.337978,1,8,1,2019,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5275,2023-03-07 05:00:00+00:00,40.110001,40.180000,39.580002,39.799999,6987600,0.0,0.0,0.310001,-0.599998,0,7,3,2023,1
5276,2023-03-08 05:00:00+00:00,39.740002,40.110001,39.340000,39.660000,6250300,0.0,0.0,0.080002,-0.770000,0,8,3,2023,1
5277,2023-03-09 05:00:00+00:00,39.740002,40.049999,39.240002,39.340000,5999900,0.0,0.0,0.400002,-0.809998,0,9,3,2023,1
5278,2023-03-10 05:00:00+00:00,39.500000,39.860001,38.910000,39.009998,7627300,0.0,0.0,0.490002,-0.950001,0,10,3,2023,1


In [38]:
features = df_concat[['open-close', 'low-high', 'is_quarter_end']]
target = df_concat['target']

scaler = StandardScaler()
features = scaler.fit_transform(features)

x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=43)
print(x_train.shape, x_test.shape)

(3696, 3) (1584, 3)


#Modeling

##Logistic Regression

In [39]:
LRModel = LogisticRegression()
LRModel.fit(x_train, y_train)

In [40]:
yval_predict = cross_val_predict(estimator = LRModel, X = x_train, y = y_train, cv = 10)

In [41]:
print(classification_report(y_train, yval_predict))

              precision    recall  f1-score   support

           0       0.51      0.73      0.60      1858
           1       0.51      0.28      0.36      1838

    accuracy                           0.51      3696
   macro avg       0.51      0.51      0.48      3696
weighted avg       0.51      0.51      0.48      3696



In [42]:
ytest_predict = LRModel.predict(x_test)
print("Classification Report: \n",classification_report(y_test, ytest_predict))

Classification Report: 
               precision    recall  f1-score   support

           0       0.49      0.77      0.60       764
           1       0.56      0.27      0.36       820

    accuracy                           0.51      1584
   macro avg       0.53      0.52      0.48      1584
weighted avg       0.53      0.51      0.48      1584



##SVM

In [43]:
SVMModel = SVC(kernel='poly', probability=True)
SVMModel.fit(x_train, y_train)

In [44]:
yval_predict = cross_val_predict(estimator = SVMModel, X = x_train, y = y_train, cv = 10)

In [45]:
print(classification_report(y_train, yval_predict))

              precision    recall  f1-score   support

           0       0.50      0.80      0.61      1858
           1       0.47      0.18      0.26      1838

    accuracy                           0.49      3696
   macro avg       0.48      0.49      0.44      3696
weighted avg       0.48      0.49      0.44      3696



In [46]:
ytest_predict = SVMModel.predict(x_test)
print("Classification Report: \n",classification_report(y_test, ytest_predict))

Classification Report: 
               precision    recall  f1-score   support

           0       0.49      0.90      0.63       764
           1       0.55      0.12      0.19       820

    accuracy                           0.49      1584
   macro avg       0.52      0.51      0.41      1584
weighted avg       0.52      0.49      0.40      1584



##Random Forest

In [47]:
RFModel = RandomForestClassifier()
RFModel.fit(x_train, y_train)

In [48]:
yval_predict = cross_val_predict(estimator = RFModel, X = x_train, y = y_train, cv = 10)

In [49]:
print(classification_report(y_train, yval_predict))

              precision    recall  f1-score   support

           0       0.63      0.64      0.63      1858
           1       0.63      0.62      0.63      1838

    accuracy                           0.63      3696
   macro avg       0.63      0.63      0.63      3696
weighted avg       0.63      0.63      0.63      3696



In [50]:
ytest_predict = RFModel.predict(x_test)
print("Classification Report: \n",classification_report(y_test, ytest_predict))

Classification Report: 
               precision    recall  f1-score   support

           0       0.62      0.67      0.64       764
           1       0.67      0.62      0.64       820

    accuracy                           0.64      1584
   macro avg       0.64      0.64      0.64      1584
weighted avg       0.65      0.64      0.64      1584



#Save Model

In [51]:
import pickle

filename = 'stockEnergyPredictModel.pkl'

with open(filename, 'wb') as file:
    pickle.dump(RFModel, file)
    file.close()