# Library

In [54]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier

from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

import warnings
warnings.filterwarnings('ignore')

# Initiate Data

In [55]:
stockTech_List = ["AAPL","GOOGL","TSLA","NVDA","META"]

In [56]:
# buat dataframe kosong
df_concat = pd.DataFrame()

for stock in stockTech_List:
  path = "/content/"+stock+"data.csv"
  df = pd.read_csv(path)
  df['open-close'] = df['Open'] - df['Close']
  df['low-high'] = df['Low'] - df['High']
  df['target'] = np.where(df['Close'].shift(-1) > df['Close'], 1, 0)

  df['Date'] = pd.to_datetime(df['Date'], utc=True)
  df['day'] = df['Date'].dt.day
  df['month'] = df['Date'].dt.month
  df['year'] = df['Date'].dt.year
  df['is_quarter_end'] = np.where(df['month']%3==0,1,0)

  # gabungkan dataframe setiap saham ke dataframe utama
  df_concat = pd.concat([df_concat, df], ignore_index=True)

In [75]:
df_concat.to_csv('stockTechdata.csv')

In [57]:
df_concat

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,open-close,low-high,target,day,month,year,is_quarter_end
0,2019-01-02 05:00:00+00:00,37.317041,38.271110,37.158029,38.047047,148158800,0.0,0.0,-0.730006,-1.113081,0,2,1,2019,0
1,2019-01-03 05:00:00+00:00,34.688534,35.107747,34.211502,34.257278,365248800,0.0,0.0,0.431256,-0.896245,1,3,1,2019,0
2,2019-01-04 05:00:00+00:00,34.821047,35.789570,34.645172,35.719700,234428400,0.0,0.0,-0.898653,-1.144399,0,4,1,2019,0
3,2019-01-07 05:00:00+00:00,35.825704,35.857026,35.151111,35.640190,219111200,0.0,0.0,0.185514,-0.705915,1,7,1,2019,0
4,2019-01-08 05:00:00+00:00,36.032904,36.577399,35.782343,36.319607,164101200,0.0,0.0,-0.286703,-0.795057,1,8,1,2019,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5306,2023-03-20 04:00:00+00:00,198.479996,199.360001,193.639999,197.809998,25186300,0.0,0.0,0.669998,-5.720001,1,20,3,2023,1
5307,2023-03-21 04:00:00+00:00,203.199997,203.550003,197.949997,202.160004,31827000,0.0,0.0,1.039993,-5.600006,0,21,3,2023,1
5308,2023-03-22 04:00:00+00:00,202.500000,207.369995,199.669998,199.809998,28477800,0.0,0.0,2.690002,-7.699997,1,22,3,2023,1
5309,2023-03-23 04:00:00+00:00,202.839996,207.880005,202.149994,204.279999,27389700,0.0,0.0,-1.440002,-5.730011,1,23,3,2023,1


In [58]:
df_concat.isnull().sum()

Date              0
Open              0
High              0
Low               0
Close             0
Volume            0
Dividends         0
Stock Splits      0
open-close        0
low-high          0
target            0
day               0
month             0
year              0
is_quarter_end    0
dtype: int64

In [59]:
"""df_concat['RSI'].fillna(df_concat['RSI'].mean(), inplace=True)
df_concat['bb_bbm'].fillna(df_concat['bb_bbm'].mean(), inplace=True)
df_concat['bb_bbh'].fillna(df_concat['bb_bbh'].mean(), inplace=True)
df_concat['bb_bbl'].fillna(df_concat['bb_bbl'].mean(), inplace=True)
df_concat['volatility'].fillna(df_concat['volatility'].mean(), inplace=True)
df_concat['trend'].fillna(df_concat['trend'].mean(), inplace=True)
df_concat['MA5'].fillna(df_concat['MA10'].mean(), inplace=True)
df_concat['MA10'].fillna(df_concat['MA10'].mean(), inplace=True)"""

"df_concat['RSI'].fillna(df_concat['RSI'].mean(), inplace=True)\ndf_concat['bb_bbm'].fillna(df_concat['bb_bbm'].mean(), inplace=True)\ndf_concat['bb_bbh'].fillna(df_concat['bb_bbh'].mean(), inplace=True)\ndf_concat['bb_bbl'].fillna(df_concat['bb_bbl'].mean(), inplace=True)\ndf_concat['volatility'].fillna(df_concat['volatility'].mean(), inplace=True)\ndf_concat['trend'].fillna(df_concat['trend'].mean(), inplace=True)\ndf_concat['MA5'].fillna(df_concat['MA10'].mean(), inplace=True)\ndf_concat['MA10'].fillna(df_concat['MA10'].mean(), inplace=True)"

In [60]:
#df_concat.isnull().sum()

In [61]:
features = df_concat[['open-close', 'low-high', 'is_quarter_end']]
target = df_concat['target']

scaler = StandardScaler()
features = scaler.fit_transform(features)

x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=43)
print(x_train.shape, x_test.shape)

(3717, 3) (1594, 3)


#Modeling

##Logistic Regression

In [62]:
LRModel = LogisticRegression()
LRModel.fit(x_train, y_train)

In [63]:
yval_predict = cross_val_predict(estimator = LRModel, X = x_train, y = y_train, cv = 10)

In [64]:
print(classification_report(y_train, yval_predict))

              precision    recall  f1-score   support

           0       0.52      0.08      0.13      1753
           1       0.53      0.94      0.68      1964

    accuracy                           0.53      3717
   macro avg       0.53      0.51      0.41      3717
weighted avg       0.53      0.53      0.42      3717



In [65]:
ytest_predict = LRModel.predict(x_test)
print("Classification Report: \n",classification_report(y_test, ytest_predict))

Classification Report: 
               precision    recall  f1-score   support

           0       0.50      0.07      0.12       732
           1       0.54      0.94      0.69       862

    accuracy                           0.54      1594
   macro avg       0.52      0.51      0.41      1594
weighted avg       0.52      0.54      0.43      1594



##SVM

In [66]:
SVMModel = SVC(kernel='poly', probability=True)
SVMModel.fit(x_train, y_train)

In [67]:
yval_predict = cross_val_predict(estimator = SVMModel, X = x_train, y = y_train, cv = 10)

In [68]:
print(classification_report(y_train, yval_predict))

              precision    recall  f1-score   support

           0       0.40      0.04      0.07      1753
           1       0.53      0.95      0.68      1964

    accuracy                           0.52      3717
   macro avg       0.46      0.49      0.37      3717
weighted avg       0.47      0.52      0.39      3717



In [69]:
ytest_predict = SVMModel.predict(x_test)
print("Classification Report: \n",classification_report(y_test, ytest_predict))

Classification Report: 
               precision    recall  f1-score   support

           0       0.45      0.03      0.06       732
           1       0.54      0.97      0.69       862

    accuracy                           0.54      1594
   macro avg       0.50      0.50      0.38      1594
weighted avg       0.50      0.54      0.40      1594



##Random Forest

In [70]:
RFModel = RandomForestClassifier()
RFModel.fit(x_train, y_train)

In [71]:
yval_predict = cross_val_predict(estimator = RFModel, X = x_train, y = y_train, cv = 10)

In [72]:
print(classification_report(y_train, yval_predict))

              precision    recall  f1-score   support

           0       0.47      0.47      0.47      1753
           1       0.53      0.53      0.53      1964

    accuracy                           0.50      3717
   macro avg       0.50      0.50      0.50      3717
weighted avg       0.50      0.50      0.50      3717



In [73]:
ytest_predict = RFModel.predict(x_test)
print("Classification Report: \n",classification_report(y_test, ytest_predict))

Classification Report: 
               precision    recall  f1-score   support

           0       0.43      0.43      0.43       732
           1       0.52      0.52      0.52       862

    accuracy                           0.48      1594
   macro avg       0.47      0.47      0.47      1594
weighted avg       0.48      0.48      0.48      1594



#Save Model

In [74]:
import pickle

filename = 'stockTechPredictModel.pkl'

with open(filename, 'wb') as file:
    pickle.dump(RFModel, file)
    file.close()