In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import SVC
from joblib import dump,load
import os
from sklearn.metrics import accuracy_score, classification_report
def fetch_data(ticker):
    stock = yf.Ticker(ticker)
    data = stock.history(period="max")  # Fetch 5 years of data
    data.reset_index(inplace=True)
    return data


In [2]:
stock_list=pd.read_csv('nasdaq-listed.csv')

In [3]:

def preprocess_data(data):
    # Feature engineering: Create relevant columns
    data['open-close'] = data['Open'] - data['Close']
    data['high-low'] = data['High'] - data['Low']
    data['price-change'] = data['Close'].pct_change()
    data['is_quarter_end'] = np.where((data['Date'].dt.month % 3 == 0) & (data['Date'].dt.day > 23), 1, 0)
    data['SMA_10'] = data['Close'].rolling(window=10).mean()
    data['SMA_50'] = data['Close'].rolling(window=50).mean()
    data['SMA_200'] = data['Close'].rolling(window=200).mean()
    data['EMA_10'] = data['Close'].ewm(span=10, adjust=False).mean()
    data['EMA_50'] = data['Close'].ewm(span=50, adjust=False).mean()
    data['EMA_200'] = data['Close'].ewm(span=200, adjust=False).mean()
    # Drop unnecessary columns
    data = data.drop(['Dividends', 'Stock Splits'], axis=1, errors='ignore')
    data.dropna(inplace=True)  # Handle missing values
    
    return data




In [4]:
accuracy_list=pd.read_csv("Accuracy_Data_NASDAQ.csv")

print(accuracy_list)

     STOCK   Accuracy Symbol
0      NaN  55.911330   AACG
1      NaN  54.248366   AADI
2      NaN  53.197674   AADR
3      NaN  49.354839    AAL
4      NaN  67.598017   AAME
..     ...        ...    ...
772    NaN  52.376600    CHI
773    NaN  50.394265   CHKP
774    NaN  64.775414   CHMG
775    NaN  63.358779   CHNR
776    NaN  63.358779   CHNR

[777 rows x 3 columns]


In [None]:
for index, row in stock_list.iloc[934:].iterrows():
    stock=row['Stocks']
    data=fetch_data(stock)
    if (not data.empty) & (len(data)>11):
        data=preprocess_data(data)
        model_dir=f"./models/{stock}/"
        if not os.path.exists(model_dir):
                    os.mkdir(model_dir)
        accuracy_list.to_csv("Accuracy_Data_NASDAQ.csv",index=False)
        # Define features (X) and target (y)
        features = data[['open-close', 'high-low', 'Volume', 'is_quarter_end','SMA_10','SMA_50',"SMA_200",'EMA_10','EMA_50',"EMA_200"]]
        target = np.where(data['Close'].shift(-1) > data['Close'], 1, 0)  # 1 foSr price increase, 0 otherwise
        if features.shape[0] > 0 and target.shape[0] > 0:
   
            X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
            
            # Standardize features
            scaler = StandardScaler()
            X_train = scaler.fit_transform(X_train)
            X_test = scaler.transform(X_test)
            # Initialize and train the model
            model = SVC(kernel='poly', probability=True, random_state=42)  
            model.fit(X_train, y_train)
            
            # Save the scaler and model for reuse
            
            dump(scaler, f"{model_dir}{stock}_scaler.joblib")
            dump(model, f"{model_dir}{stock}_predictor.joblib")
            y_pred = model.predict(X_test)
            
            # Evaluate performance
            print(stock," ",index)
            print("Accuracy:", accuracy_score(y_test, y_pred))
            print("Classification Report:\n", classification_report(y_test, y_pred))
            stock_accuracy=pd.DataFrame({"Symbol":[row['Stocks']],"Accuracy":[accuracy_score(y_test,y_pred)*100]})
            accuracy_list=pd.concat([accuracy_list,stock_accuracy])
            print("Accuracy List Length :",len(accuracy_list))
        

$CLOD: possibly delisted; no price data found  (1d 1926-01-24 -> 2024-12-30)


CLOU   935
Accuracy: 0.5362903225806451
Classification Report:
               precision    recall  f1-score   support

           0       0.44      0.11      0.17       112
           1       0.55      0.89      0.68       136

    accuracy                           0.54       248
   macro avg       0.50      0.50      0.43       248
weighted avg       0.50      0.54      0.45       248

Accuracy List Length : 820
CLOV   936
Accuracy: 0.531578947368421
Classification Report:
               precision    recall  f1-score   support

           0       0.53      0.99      0.69       102
           1       0.00      0.00      0.00        88

    accuracy                           0.53       190
   macro avg       0.27      0.50      0.35       190
weighted avg       0.29      0.53      0.37       190

Accuracy List Length : 821
CLPS   937
Accuracy: 0.5494880546075085
Classification Report:
               precision    recall  f1-score   support

           0       0.56      0.97      0.71   

CLRCR: Period 'max' is invalid, must be one of ['1d', '5d']


CLRCU   942
Accuracy: 0.968421052631579
Classification Report:
               precision    recall  f1-score   support

           0       0.97      1.00      0.98        92
           1       0.00      0.00      0.00         3

    accuracy                           0.97        95
   macro avg       0.48      0.50      0.49        95
weighted avg       0.94      0.97      0.95        95

Accuracy List Length : 826


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
CLRCW: Period 'max' is invalid, must be one of ['1d', '5d']


CLRO   944
Accuracy: 0.6234428086070215
Classification Report:
               precision    recall  f1-score   support

           0       0.62      1.00      0.77      1100
           1       1.00      0.00      0.00       666

    accuracy                           0.62      1766
   macro avg       0.81      0.50      0.39      1766
weighted avg       0.77      0.62      0.48      1766

Accuracy List Length : 827
CLSD   945
Accuracy: 0.5382653061224489
Classification Report:
               precision    recall  f1-score   support

           0       0.54      0.92      0.69       213
           1       0.47      0.08      0.13       179

    accuracy                           0.54       392
   macro avg       0.51      0.50      0.41       392
weighted avg       0.51      0.54      0.43       392

Accuracy List Length : 828
CLSK   946
Accuracy: 0.6016260162601627
Classification Report:
               precision    recall  f1-score   support

           0       0.60      0.99      0.75  

CLSKW: Period 'max' is invalid, must be one of ['1d', '5d']


CLSM   948
Accuracy: 0.562962962962963
Classification Report:
               precision    recall  f1-score   support

           0       0.68      0.30      0.42        70
           1       0.53      0.85      0.65        65

    accuracy                           0.56       135
   macro avg       0.60      0.57      0.53       135
weighted avg       0.61      0.56      0.53       135

Accuracy List Length : 830
CLST   949
Accuracy: 0.7049180327868853
Classification Report:
               precision    recall  f1-score   support

           0       0.72      0.93      0.81        83
           1       0.60      0.23      0.33        39

    accuracy                           0.70       122
   macro avg       0.66      0.58      0.57       122
weighted avg       0.68      0.70      0.66       122

Accuracy List Length : 831
CLWT   950
Accuracy: 0.625
Classification Report:
               precision    recall  f1-score   support

           0       0.63      1.00      0.77       853
     

CMPOW: Period 'max' is invalid, must be one of ['1d', '5d']


CMPR   962
Accuracy: 0.4951560818083961
Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.05      0.09       469
           1       0.49      0.95      0.65       460

    accuracy                           0.50       929
   macro avg       0.50      0.50      0.37       929
weighted avg       0.50      0.50      0.37       929

Accuracy List Length : 843
CMPS   963
Accuracy: 0.5511363636363636
Classification Report:
               precision    recall  f1-score   support

           0       0.57      0.91      0.70       100
           1       0.40      0.08      0.13        76

    accuracy                           0.55       176
   macro avg       0.48      0.49      0.41       176
weighted avg       0.49      0.55      0.45       176

Accuracy List Length : 844
CMPX   964
Accuracy: 0.5234899328859061
Classification Report:
               precision    recall  f1-score   support

           0       0.55      0.83      0.66  

In [None]:
accuracy_list.to_csv("Accuracy_Data_NASDAQ.csv",index=False)