In [66]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import SVC
from joblib import dump,load
import os
from sklearn.metrics import accuracy_score, classification_report
def fetch_data(ticker):
    stock = yf.Ticker(ticker)
    data = stock.history(period="max")  # Fetch 5 years of data
    data.reset_index(inplace=True)
    return data


In [67]:
stock_list=pd.read_csv('nasdaq-listed.csv')

In [68]:

def preprocess_data(data):
    # Feature engineering: Create relevant columns
    data['open-close'] = data['Open'] - data['Close']
    data['high-low'] = data['High'] - data['Low']
    data['price-change'] = data['Close'].pct_change()
    data['is_quarter_end'] = np.where((data['Date'].dt.month % 3 == 0) & (data['Date'].dt.day > 23), 1, 0)
    data['SMA_10'] = data['Close'].rolling(window=10).mean()
    data['SMA_50'] = data['Close'].rolling(window=50).mean()
    data['SMA_200'] = data['Close'].rolling(window=200).mean()
    data['EMA_10'] = data['Close'].ewm(span=10, adjust=False).mean()
    data['EMA_50'] = data['Close'].ewm(span=50, adjust=False).mean()
    data['EMA_200'] = data['Close'].ewm(span=200, adjust=False).mean()
    # Drop unnecessary columns
    data = data.drop(['Dividends', 'Stock Splits'], axis=1, errors='ignore')
    data.dropna(inplace=True)  # Handle missing values
    
    return data




In [69]:
accuracy_list=pd.read_csv("Accuracy_Data_NASDAQ.csv")

print(accuracy_list)

    STOCK   Accuracy Symbol
0     NaN  55.911330   AACG
1     NaN  54.248366   AADI
2     NaN  53.197674   AADR
3     NaN  49.354839    AAL
4     NaN  67.598017   AAME
5     NaN  49.715370   AAOI
6     NaN  52.986023   AAON
7     NaN  51.851852   AAPB
8     NaN  55.555556   AAPD
9     NaN  50.527281   AAPL
10    NaN  55.911330   AACG
11    NaN  54.248366   AADI
12    NaN  53.197674   AADR
13    NaN  49.354839    AAL
14    NaN  67.598017   AAME
15    NaN  49.715370   AAOI
16    NaN  52.986023   AAON
17    NaN  51.851852   AAPB
18    NaN  55.555556   AAPD
19    NaN  50.527281   AAPL
20    NaN  48.148148   AAPU
21    NaN  52.993631   AAXJ
22    NaN  56.403941   ABAT
23    NaN  46.951220   ABCL
24    NaN  66.666667   ABCS
25    NaN  63.565891   ABEO
26    NaN  66.101695    ABL
27    NaN  56.250000  ABLLL
28    NaN  56.250000  ABLLL
29    NaN  44.444444   ABLV
30    NaN  59.756098   ABNB
31    NaN  49.264706   ABOS
32    NaN  72.549020    ABP
33    NaN  50.000000   ABSI
34    NaN  64.600000

In [None]:
for index, row in stock_list.iloc[50:].iterrows():
    stock=row['Stocks']
    data=fetch_data(stock)
    if (not data.empty) & (len(data)>11):
        data=preprocess_data(data)
        model_dir=f"./models/{stock}/"
        if not os.path.exists(model_dir):
                    os.mkdir(model_dir)
        accuracy_list.to_csv("Accuracy_Data_NASDAQ.csv",index=False)
        # Define features (X) and target (y)
        features = data[['open-close', 'high-low', 'Volume', 'is_quarter_end','SMA_10','SMA_50',"SMA_200",'EMA_10','EMA_50',"EMA_200"]]
        target = np.where(data['Close'].shift(-1) > data['Close'], 1, 0)  # 1 foSr price increase, 0 otherwise
        if features.shape[0] > 0 and target.shape[0] > 0:
   
            X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
            
            # Standardize features
            scaler = StandardScaler()
            X_train = scaler.fit_transform(X_train)
            X_test = scaler.transform(X_test)
            # Initialize and train the model
            model = SVC(kernel='poly', probability=True, random_state=42)  
            model.fit(X_train, y_train)
            
            # Save the scaler and model for reuse
            
            dump(scaler, f"{model_dir}{stock}_scaler.joblib")
            dump(model, f"{model_dir}{stock}_predictor.joblib")
            y_pred = model.predict(X_test)
            
            # Evaluate performance
            print(stock," ",index)
            print("Accuracy:", accuracy_score(y_test, y_pred))
            print("Classification Report:\n", classification_report(y_test, y_pred))
            stock_accuracy=pd.DataFrame({"Symbol":[row['Stocks']],"Accuracy":[accuracy_score(y_test,y_pred)*100]})
            accuracy_list=pd.concat([accuracy_list,stock_accuracy])
            print("Accuracy List Length :",len(accuracy_list))
        

ACNT   50
Accuracy: 0.598017124831005
Classification Report:
               precision    recall  f1-score   support

           0       0.60      0.99      0.75      1326
           1       0.53      0.01      0.02       893

    accuracy                           0.60      2219
   macro avg       0.57      0.50      0.38      2219
weighted avg       0.57      0.60      0.45      2219

Accuracy List Length : 79


ACONW: Period 'max' is invalid, must be one of ['1d', '5d']


ACON   52
Accuracy: 0.5416666666666666
Classification Report:
               precision    recall  f1-score   support

           0       0.54      0.98      0.70        52
           1       0.50      0.02      0.04        44

    accuracy                           0.54        96
   macro avg       0.52      0.50      0.37        96
weighted avg       0.52      0.54      0.40        96

Accuracy List Length : 80
ACRS   54
Accuracy: 0.5458823529411765
Classification Report:
               precision    recall  f1-score   support

           0       0.56      0.90      0.69       242
           1       0.36      0.07      0.12       183

    accuracy                           0.55       425
   macro avg       0.46      0.49      0.41       425
weighted avg       0.48      0.55      0.45       425

Accuracy List Length : 81
ACRV   55
Accuracy: 0.4925373134328358
Classification Report:
               precision    recall  f1-score   support

           0       0.48      0.75      0.59       

ADNWW: Period 'max' is invalid, must be one of ['1d', '5d']


In [None]:
accuracy_list.to_csv("Accuracy_Data_NASDAQ.csv",index=False)