In [103]:
import yfinance as yf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn import metrics
import warnings
import numpy as np
from joblib import dump
warnings.filterwarnings('ignore')
import os


In [19]:
stock_list=pd.read_csv('stocks.txt')
stock_list.head()

Unnamed: 0,Stocks
0,ABT
1,ABBV
2,ABMD
3,ACN
4,ATVI


In [96]:
def load_data(stock):
    ticker = yf.Ticker(ticker_symbol)
    data = ticker.history(period="max")
    data=data.reset_index()
    data['Date']=data['Date'].astype(str)
    data['Date']= data['Date'].str.split(expand=True)[0]
     # Split the Date column into year, month, and day
    splitted = data['Date'].astype(str).str.split('-', expand=True)
    
    # Add the splitted columns back to the DataFrame
    data['Year'] = splitted[0].astype(int)
    data['Month'] = splitted[1].astype(int)
    data['Day'] = splitted[2].astype(int)
    data=data.drop(['Date','Dividends','Stock Splits'],axis=1)

    data['is_quarter_end'] = np.where(data['Month']%3==0,1,0)

    data['open-close'] = data['Open'] - data['Close']
    data['high-low'] = data['High'] - data['Low']
    data['target'] = np.where(data['Close'].shift(-1) > data['Close'], 1, 0)
    
    print(data.columns.tolist())
    return data
    

In [97]:
data=load_data(stock_list['Stocks'][0])
data.head()


['Open', 'High', 'Low', 'Close', 'Volume', 'Year', 'Month', 'Day', 'is_quarter_end', 'open-close', 'high-low', 'target']


Unnamed: 0,Open,High,Low,Close,Volume,Year,Month,Day,is_quarter_end,open-close,high-low,target
0,1.266667,1.666667,1.169333,1.592667,281494500,2010,6,29,1,-0.326,0.497334,0
1,1.719333,2.028,1.553333,1.588667,257806500,2010,6,30,1,0.130666,0.474667,0
2,1.666667,1.728,1.351333,1.464,123282000,2010,7,1,0,0.202667,0.376667,0
3,1.533333,1.54,1.247333,1.28,77097000,2010,7,2,0,0.253333,0.292667,0
4,1.333333,1.333333,1.055333,1.074,103003500,2010,7,6,0,0.259333,0.278,0


In [110]:
def train_model(stock_list):
    for stock in stock_list['Stocks']:
        data=load_data(stock)
        features=data[['open-close','high-low','is_quarter_end']]
        target=data['target']
        scaler=StandardScaler()
        scaler = scaler.fit_transform(features)
        X_train,X_test,Y_train,Y_test=train_test_split(features,target,test_size=0.1,random_state=2022)
        #print(X_train.shape,X_test.shape)
        models = [LogisticRegression(), SVC(
        kernel='poly', probability=True), XGBClassifier()]
        directory_name="./models/"+stock+"/" #directory for saving model
        if not os.path.exists(directory_name):
            os.mkdir(directory_name)
        LR="./models/"+stock+"/"+stock+"_LR.joblib"
        SV="./models/"+stock+"/"+stock+"_SVC.joblib"
        XGB="./models/"+stock+"/"+stock+"_XGB.joblib"
        modelsave=[LR,SV,XGB]
        for i in range(3):
            models[i].fit(X_train, Y_train)
            
            print(f'{stock} {models[i]} : ')
            print('Training Accuracy : ', metrics.roc_auc_score(Y_train, models[i].predict_proba(X_train)[:,1]))
            print('Validation Accuracy : ', metrics.roc_auc_score(Y_test, models[i].predict_proba(X_test)[:,1]))
            dump(models[i],modelsave[i])
            
        
        


In [112]:
train_model(stock_list)

['Open', 'High', 'Low', 'Close', 'Volume', 'Year', 'Month', 'Day', 'is_quarter_end', 'open-close', 'high-low', 'target']
ABT LogisticRegression() : 
Training Accuracy :  0.5231284266655976
Validation Accuracy :  0.5055897993715253
ABT SVC(kernel='poly', probability=True) : 
Training Accuracy :  0.49854719958660065
Validation Accuracy :  0.4900290065264684
ABT XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
         