In [1]:
import yfinance as yf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn import metrics
import warnings
import numpy as np
from joblib import dump
warnings.filterwarnings('ignore')
import os


In [2]:
stock_list=pd.read_csv('stocks.txt')
stock_list.head()

Unnamed: 0,Stocks
0,ABT
1,ACN
2,ADBE
3,AMD
4,AAP


In [3]:
def load_data(stock):
    ticker = yf.Ticker(stock)
    data = ticker.history(period="max")
    data=data.reset_index()
    data['Date']=data['Date'].astype(str)
    data['Date']= data['Date'].str.split(expand=True)[0]
     # Split the Date column into year, month, and day
    splitted = data['Date'].astype(str).str.split('-', expand=True)
    
    # Add the splitted columns back to the DataFrame
    data['Year'] = splitted[0].astype(int)
    data['Month'] = splitted[1].astype(int)
    data['Day'] = splitted[2].astype(int)
    data=data.drop(['Date','Dividends','Stock Splits'],axis=1)

    data['is_quarter_end'] = np.where(data['Month']%3==0,1,0)

    data['open-close'] = data['Open'] - data['Close']
    data['high-low'] = data['High'] - data['Low']
    data['target'] = np.where(data['Close'].shift(-1) > data['Close'], 1, 0)
    
    print(data.columns.tolist())
    return data
    

In [4]:
data=load_data(stock_list['Stocks'][0])
data.head()


['Open', 'High', 'Low', 'Close', 'Volume', 'Year', 'Month', 'Day', 'is_quarter_end', 'open-close', 'high-low', 'target']


Unnamed: 0,Open,High,Low,Close,Volume,Year,Month,Day,is_quarter_end,open-close,high-low,target
0,0.0,0.188146,0.182991,0.184279,7513463,1980,3,17,1,-0.184279,0.005155,1
1,0.0,0.188146,0.181702,0.185568,5303621,1980,3,18,1,-0.185568,0.006444,1
2,0.0,0.189434,0.184924,0.188146,2523497,1980,3,19,1,-0.188146,0.00451,0
3,0.0,0.18879,0.183635,0.183635,4654925,1980,3,20,1,-0.183635,0.005155,1
4,0.0,0.186212,0.183635,0.185568,1333034,1980,3,21,1,-0.185568,0.002577,0


In [5]:
def train_model(stock_list):
    for stock in stock_list['Stocks']:
        data=load_data(stock)
        if not data.empty :
            
            
            features=data[['open-close','high-low','is_quarter_end']]
            target=data['target']
            scaler=StandardScaler()
            scaler = scaler.fit_transform(features)
            X_train,X_test,Y_train,Y_test=train_test_split(features,target,test_size=0.1,random_state=2022)
            #print(X_train.shape,X_test.shape)
            models = [LogisticRegression(), SVC(
            kernel='poly', probability=True), XGBClassifier()]
            directory_name="./models/"+stock+"/" #directory for saving model
            if not os.path.exists(directory_name):
                os.mkdir(directory_name)
            LR="./models/"+stock+"/"+stock+"_LR.joblib"
            SV="./models/"+stock+"/"+stock+"_SVC.joblib"
            XGB="./models/"+stock+"/"+stock+"_XGB.joblib"
            SP="./models/"+stock+"/Scaler.joblib"
            csv="./models/"+stock+"/"+stock+".csv"
            data.to_csv(csv,index=False)
            modelsave=[LR,SV,XGB]
            dump(scaler,SP)
            print(stock)
            for i in range(3):
                models[i].fit(X_train, Y_train)
                
                print(f'{stock} {models[i]} : ')
                print('Training Accuracy : ', metrics.roc_auc_score(Y_train, models[i].predict_proba(X_train)[:,1]))
                print('Validation Accuracy : ', metrics.roc_auc_score(Y_test, models[i].predict_proba(X_test)[:,1]))
                dump(models[i],modelsave[i])
            
        
        


In [6]:
train_model(stock_list)

['Open', 'High', 'Low', 'Close', 'Volume', 'Year', 'Month', 'Day', 'is_quarter_end', 'open-close', 'high-low', 'target']
ABT
ABT LogisticRegression() : 
Training Accuracy :  0.5215107857197542
Validation Accuracy :  0.5255657584424708
ABT SVC(kernel='poly', probability=True) : 
Training Accuracy :  0.5028313754163821
Validation Accuracy :  0.49951703547593956
ABT XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
     

KeyboardInterrupt: 

In [None]:
for stock in stock_list['SYMBOL']:
    ticker=yf.Ticker(f"{stock}.NS")
    data=ticker.history(period='max')
    print(stock)


In [None]:
stock_list=pd.read_csv('NSE.csv')
stock_list.head()