In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import SVC
from joblib import dump,load
import os
from sklearn.metrics import accuracy_score, classification_report
def fetch_data(ticker):
    stock = yf.Ticker(ticker)
    data = stock.history(period="max")  # Fetch 5 years of data
    data.reset_index(inplace=True)
    return data


In [2]:
stock_list=pd.read_csv('nasdaq-listed.csv')

In [3]:

def preprocess_data(data):
    # Feature engineering: Create relevant columns
    data['open-close'] = data['Open'] - data['Close']
    data['high-low'] = data['High'] - data['Low']
    data['price-change'] = data['Close'].pct_change()
    data['is_quarter_end'] = np.where((data['Date'].dt.month % 3 == 0) & (data['Date'].dt.day > 23), 1, 0)
    data['SMA_10'] = data['Close'].rolling(window=10).mean()
    data['SMA_50'] = data['Close'].rolling(window=50).mean()
    data['SMA_200'] = data['Close'].rolling(window=200).mean()
    data['EMA_10'] = data['Close'].ewm(span=10, adjust=False).mean()
    data['EMA_50'] = data['Close'].ewm(span=50, adjust=False).mean()
    data['EMA_200'] = data['Close'].ewm(span=200, adjust=False).mean()
    # Drop unnecessary columns
    data = data.drop(['Dividends', 'Stock Splits'], axis=1, errors='ignore')
    data.dropna(inplace=True)  # Handle missing values
    
    return data




In [4]:
accuracy_list=pd.read_csv("Accuracy_Data_NASDAQ.csv")

print(accuracy_list)

     STOCK   Accuracy Symbol
0      NaN  55.911330   AACG
1      NaN  54.248366   AADI
2      NaN  53.197674   AADR
3      NaN  49.354839    AAL
4      NaN  67.598017   AAME
..     ...        ...    ...
772    NaN  52.376600    CHI
773    NaN  50.394265   CHKP
774    NaN  64.775414   CHMG
775    NaN  63.358779   CHNR
776    NaN  63.358779   CHNR

[777 rows x 3 columns]


In [None]:
for index, row in stock_list.iloc[1293:].iterrows():
    stock=row['Stocks']
    data=fetch_data(stock)
    if (not data.empty) & (len(data)>11):
        data=preprocess_data(data)
        model_dir=f"./models/{stock}/"
        if not os.path.exists(model_dir):
                    os.mkdir(model_dir)
        accuracy_list.to_csv("Accuracy_Data_NASDAQ.csv",index=False)
        # Define features (X) and target (y)
        features = data[['open-close', 'high-low', 'Volume', 'is_quarter_end','SMA_10','SMA_50',"SMA_200",'EMA_10','EMA_50',"EMA_200"]]
        target = np.where(data['Close'].shift(-1) > data['Close'], 1, 0)  # 1 foSr price increase, 0 otherwise
        if features.shape[0] > 0 and target.shape[0] > 0:
   
            X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
            
            # Standardize features
            scaler = StandardScaler()
            X_train = scaler.fit_transform(X_train)
            X_test = scaler.transform(X_test)
            # Initialize and train the model
            model = SVC(kernel='poly', probability=True, random_state=42)  
            model.fit(X_train, y_train)
            
            # Save the scaler and model for reuse
            
            dump(scaler, f"{model_dir}{stock}_scaler.joblib")
            dump(model, f"{model_dir}{stock}_predictor.joblib")
            y_pred = model.predict(X_test)
            
            # Evaluate performance
            print(stock," ",index)
            print("Accuracy:", accuracy_score(y_test, y_pred))
            print("Classification Report:\n", classification_report(y_test, y_pred))
            stock_accuracy=pd.DataFrame({"Symbol":[row['Stocks']],"Accuracy":[accuracy_score(y_test,y_pred)*100]})
            accuracy_list=pd.concat([accuracy_list,stock_accuracy])
            print("Accuracy List Length :",len(accuracy_list))
        

DYFI   1293
Accuracy: 0.2
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         5
           1       0.29      0.40      0.33         5

    accuracy                           0.20        10
   macro avg       0.14      0.20      0.17        10
weighted avg       0.14      0.20      0.17        10

Accuracy List Length : 1121
DYN   1294
Accuracy: 0.5681818181818182
Classification Report:
               precision    recall  f1-score   support

           0       0.58      0.94      0.72       102
           1       0.40      0.05      0.10        74

    accuracy                           0.57       176
   macro avg       0.49      0.50      0.41       176
weighted avg       0.50      0.57      0.46       176

Accuracy List Length : 1122
DYNI   1295
Accuracy: 0.29411764705882354
Classification Report:
               precision    recall  f1-score   support

           0       0.29      0.67      0.40         6
 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
ECDAW: Period 'max' is invalid, must be one of ['1d', '5d']


ECOR   1309
Accuracy: 0.5397923875432526
Classification Report:
               precision    recall  f1-score   support

           0       0.55      0.93      0.69       160
           1       0.39      0.05      0.10       129

    accuracy                           0.54       289
   macro avg       0.47      0.49      0.39       289
weighted avg       0.48      0.54      0.43       289

Accuracy List Length : 1135
ECOW   1310
Accuracy: 0.5346938775510204
Classification Report:
               precision    recall  f1-score   support

           0       0.54      0.93      0.68       132
           1       0.47      0.07      0.12       113

    accuracy                           0.53       245
   macro avg       0.51      0.50      0.40       245
weighted avg       0.51      0.53      0.42       245

Accuracy List Length : 1136
ECPG   1311
Accuracy: 0.5534995977473853
Classification Report:
               precision    recall  f1-score   support

           0       0.55      0.96      0

ECXWW: Period 'max' is invalid, must be one of ['1d', '5d']


EDAP   1314
Accuracy: 0.5701492537313433
Classification Report:
               precision    recall  f1-score   support

           0       0.57      0.99      0.72       767
           1       0.40      0.01      0.02       573

    accuracy                           0.57      1340
   macro avg       0.49      0.50      0.37      1340
weighted avg       0.50      0.57      0.42      1340

Accuracy List Length : 1139
EDBL   1315
Accuracy: 0.6382978723404256
Classification Report:
               precision    recall  f1-score   support

           0       0.65      0.95      0.77        60
           1       0.50      0.09      0.15        34

    accuracy                           0.64        94
   macro avg       0.57      0.52      0.46        94
weighted avg       0.59      0.64      0.55        94

Accuracy List Length : 1140


EDBLW: Period 'max' is invalid, must be one of ['1d', '5d']


EDIT   1317
Accuracy: 0.5232273838630807
Classification Report:
               precision    recall  f1-score   support

           0       0.52      0.93      0.67       214
           1       0.50      0.07      0.13       195

    accuracy                           0.52       409
   macro avg       0.51      0.50      0.40       409
weighted avg       0.51      0.52      0.41       409

Accuracy List Length : 1141
EDOC   1318
Accuracy: 0.5245901639344263
Classification Report:
               precision    recall  f1-score   support

           0       0.52      0.96      0.67        94
           1       0.60      0.07      0.12        89

    accuracy                           0.52       183
   macro avg       0.56      0.51      0.40       183
weighted avg       0.56      0.52      0.41       183

Accuracy List Length : 1142
EDRY   1319
Accuracy: 0.5616438356164384
Classification Report:
               precision    recall  f1-score   support

           0       0.57      0.95      0

In [None]:
accuracy_list.to_csv("Accuracy_Data_NASDAQ.csv",index=False)