In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import SVC
from joblib import dump,load
import os
from sklearn.metrics import accuracy_score, classification_report
def fetch_data(ticker):
    stock = yf.Ticker(ticker)
    data = stock.history(period="max")  # Fetch 5 years of data
    data.reset_index(inplace=True)
    return data


In [2]:
stock_list=pd.read_csv('nasdaq-listed.csv')

In [3]:

def preprocess_data(data):
    # Feature engineering: Create relevant columns
    data['open-close'] = data['Open'] - data['Close']
    data['high-low'] = data['High'] - data['Low']
    data['price-change'] = data['Close'].pct_change()
    data['is_quarter_end'] = np.where((data['Date'].dt.month % 3 == 0) & (data['Date'].dt.day > 23), 1, 0)
    data['SMA_10'] = data['Close'].rolling(window=10).mean()
    data['SMA_50'] = data['Close'].rolling(window=50).mean()
    data['SMA_200'] = data['Close'].rolling(window=200).mean()
    data['EMA_10'] = data['Close'].ewm(span=10, adjust=False).mean()
    data['EMA_50'] = data['Close'].ewm(span=50, adjust=False).mean()
    data['EMA_200'] = data['Close'].ewm(span=200, adjust=False).mean()
    # Drop unnecessary columns
    data = data.drop(['Dividends', 'Stock Splits'], axis=1, errors='ignore')
    data.dropna(inplace=True)  # Handle missing values
    
    return data




In [4]:
accuracy_list=pd.read_csv("Accuracy_Data_NASDAQ.csv")

print(accuracy_list)

      STOCK   Accuracy Symbol
0       NaN  55.911330   AACG
1       NaN  54.248366   AADI
2       NaN  53.197674   AADR
3       NaN  49.354839    AAL
4       NaN  67.598017   AAME
...     ...        ...    ...
3759    NaN  62.280306   USEG
3760    NaN  46.875000   USFI
3761    NaN  58.695652   USGO
3762    NaN  53.810624   USIG
3763    NaN  70.919176   USIO

[3764 rows x 3 columns]


In [None]:
for index, row in stock_list.iloc[4428:].iterrows():
    stock=row['Stocks']
    data=fetch_data(stock)
    if (not data.empty) & (len(data)>11):
        data=preprocess_data(data)
        model_dir=f"./models/{stock}/"
        if not os.path.exists(model_dir):
                    os.mkdir(model_dir)
        accuracy_list.to_csv("Accuracy_Data_NASDAQ.csv",index=False)
        # Define features (X) and target (y)
        features = data[['open-close', 'high-low', 'Volume', 'is_quarter_end','SMA_10','SMA_50',"SMA_200",'EMA_10','EMA_50',"EMA_200"]]
        target = np.where(data['Close'].shift(-1) > data['Close'], 1, 0)  # 1 foSr price increase, 0 otherwise
        if features.shape[0] > 0 and target.shape[0] > 0:
   
            X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
            
            # Standardize features
            scaler = StandardScaler()
            X_train = scaler.fit_transform(X_train)
            X_test = scaler.transform(X_test)
            # Initialize and train the model
            model = SVC(kernel='poly', probability=True, random_state=42)  
            model.fit(X_train, y_train)
            
            # Save the scaler and model for reuse
            
            dump(scaler, f"{model_dir}{stock}_scaler.joblib")
            dump(model, f"{model_dir}{stock}_predictor.joblib")
            y_pred = model.predict(X_test)
            
            # Evaluate performance
            print(stock," ",index)
            print("Accuracy:", accuracy_score(y_test, y_pred))
            print("Classification Report:\n", classification_report(y_test, y_pred))
            stock_accuracy=pd.DataFrame({"Symbol":[row['Stocks']],"Accuracy":[accuracy_score(y_test,y_pred)*100]})
            accuracy_list=pd.concat([accuracy_list,stock_accuracy])
            print("Accuracy List Length :",len(accuracy_list))
        

USTB   4429
Accuracy: 0.5527950310559007
Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.02      0.04       144
           1       0.55      0.98      0.71       178

    accuracy                           0.55       322
   macro avg       0.53      0.50      0.37       322
weighted avg       0.53      0.55      0.41       322

Accuracy List Length : 3768
USVM   4430
Accuracy: 0.48757763975155277
Classification Report:
               precision    recall  f1-score   support

           0       0.18      0.01      0.02       158
           1       0.50      0.95      0.65       164

    accuracy                           0.49       322
   macro avg       0.34      0.48      0.34       322
weighted avg       0.34      0.49      0.34       322

Accuracy List Length : 3769
USVN   4431
Accuracy: 0.4
Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.03      0.06        30


VACHW: Period 'max' is invalid, must be one of ['1d', '5d']


VALN   4447
Accuracy: 0.5655172413793104
Classification Report:
               precision    recall  f1-score   support

           0       0.57      0.93      0.70        81
           1       0.54      0.11      0.18        64

    accuracy                           0.57       145
   macro avg       0.55      0.52      0.44       145
weighted avg       0.56      0.57      0.47       145

Accuracy List Length : 3783
VALU   4448
Accuracy: 0.5975728155339806
Classification Report:
               precision    recall  f1-score   support

           0       0.60      0.98      0.75      1232
           1       0.49      0.02      0.04       828

    accuracy                           0.60      2060
   macro avg       0.54      0.50      0.39      2060
weighted avg       0.55      0.60      0.46      2060

Accuracy List Length : 3784
VANI   4449
Accuracy: 0.5808510638297872
Classification Report:
               precision    recall  f1-score   support

           0       0.58      1.00      0

VCICW: Period 'max' is invalid, must be one of ['1d', '5d']


VCIG   4458
Accuracy: 0.574468085106383
Classification Report:
               precision    recall  f1-score   support

           0       0.57      1.00      0.73        27
           1       0.00      0.00      0.00        20

    accuracy                           0.57        47
   macro avg       0.29      0.50      0.36        47
weighted avg       0.33      0.57      0.42        47

Accuracy List Length : 3791


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


VCIT   4459
Accuracy: 0.5270457697642164
Classification Report:
               precision    recall  f1-score   support

           0       0.29      0.01      0.03       334
           1       0.53      0.97      0.69       387

    accuracy                           0.53       721
   macro avg       0.41      0.49      0.36       721
weighted avg       0.42      0.53      0.38       721

Accuracy List Length : 3792
VCLT   4460
Accuracy: 0.5256588072122053
Classification Report:
               precision    recall  f1-score   support

           0       0.23      0.02      0.03       330
           1       0.54      0.96      0.69       391

    accuracy                           0.53       721
   macro avg       0.38      0.49      0.36       721
weighted avg       0.39      0.53      0.39       721

Accuracy List Length : 3793
VCNX   4461
Accuracy: 0.6007067137809188
Classification Report:
               precision    recall  f1-score   support

           0       0.60      0.99      0

VEEAW: Period 'max' is invalid, must be one of ['1d', '5d']


VEEE   4470
Accuracy: 0.5895522388059702
Classification Report:
               precision    recall  f1-score   support

           0       0.59      0.95      0.73        78
           1       0.56      0.09      0.15        56

    accuracy                           0.59       134
   macro avg       0.57      0.52      0.44       134
weighted avg       0.58      0.59      0.49       134

Accuracy List Length : 3802
VEON   4471
Accuracy: 0.5152616279069767
Classification Report:
               precision    recall  f1-score   support

           0       0.52      0.90      0.66       722
           1       0.45      0.09      0.15       654

    accuracy                           0.52      1376
   macro avg       0.49      0.50      0.40      1376
weighted avg       0.49      0.52      0.42      1376

Accuracy List Length : 3803
VERA   4472
Accuracy: 0.5416666666666666
Classification Report:
               precision    recall  f1-score   support

           0       0.55      0.38      0

VGASW: Period 'max' is invalid, must be one of ['1d', '5d']


VGIT   4485
Accuracy: 0.492371705963939
Classification Report:
               precision    recall  f1-score   support

           0       0.49      0.83      0.62       357
           1       0.49      0.16      0.25       364

    accuracy                           0.49       721
   macro avg       0.49      0.50      0.43       721
weighted avg       0.49      0.49      0.43       721

Accuracy List Length : 3816
VGLT   4486
Accuracy: 0.5321229050279329
Classification Report:
               precision    recall  f1-score   support

           0       0.48      0.08      0.14       333
           1       0.54      0.92      0.68       383

    accuracy                           0.53       716
   macro avg       0.51      0.50      0.41       716
weighted avg       0.51      0.53      0.43       716

Accuracy List Length : 3817
VGSH   4487
Accuracy: 0.5409153952843273
Classification Report:
               precision    recall  f1-score   support

           0       0.54      0.97      0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
VMCAW: Period 'max' is invalid, must be one of ['1d', '5d']


VMD   4516
Accuracy: 0.5301724137931034
Classification Report:
               precision    recall  f1-score   support

           0       0.55      0.71      0.62       125
           1       0.49      0.32      0.38       107

    accuracy                           0.53       232
   macro avg       0.52      0.51      0.50       232
weighted avg       0.52      0.53      0.51       232

Accuracy List Length : 3845
VMEO   4517
Accuracy: 0.5384615384615384
Classification Report:
               precision    recall  f1-score   support

           0       0.56      0.82      0.67        80
           1       0.44      0.17      0.25        63

    accuracy                           0.54       143
   macro avg       0.50      0.50      0.46       143
weighted avg       0.51      0.54      0.48       143

Accuracy List Length : 3846
VMOT   4518
Accuracy: 0.45375722543352603
Classification Report:
               precision    recall  f1-score   support

           0       0.55      0.03      0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


VRA   4531
Accuracy: 0.5007407407407407
Classification Report:
               precision    recall  f1-score   support

           0       0.51      0.86      0.64       350
           1       0.43      0.12      0.18       325

    accuracy                           0.50       675
   macro avg       0.47      0.49      0.41       675
weighted avg       0.47      0.50      0.42       675

Accuracy List Length : 3860
VRAR   4532
Accuracy: 0.6204379562043796
Classification Report:
               precision    recall  f1-score   support

           0       0.65      0.91      0.76        90
           1       0.27      0.06      0.10        47

    accuracy                           0.62       137
   macro avg       0.46      0.49      0.43       137
weighted avg       0.52      0.62      0.53       137

Accuracy List Length : 3861
VRAX   4533
Accuracy: 0.5476190476190477
Classification Report:
               precision    recall  f1-score   support

           0       0.55      0.91      0.

VRMEW: Period 'max' is invalid, must be one of ['1d', '5d']


VRNA   4541
Accuracy: 0.5446685878962536
Classification Report:
               precision    recall  f1-score   support

           0       0.55      0.96      0.70       192
           1       0.36      0.03      0.05       155

    accuracy                           0.54       347
   macro avg       0.46      0.49      0.37       347
weighted avg       0.47      0.54      0.41       347

Accuracy List Length : 3869
VRNS   4542
Accuracy: 0.5128205128205128
Classification Report:
               precision    recall  f1-score   support

           0       0.60      0.05      0.09       251
           1       0.51      0.97      0.67       256

    accuracy                           0.51       507
   macro avg       0.55      0.51      0.38       507
weighted avg       0.55      0.51      0.38       507

Accuracy List Length : 3870
VRNT   4543
Accuracy: 0.5327272727272727
Classification Report:
               precision    recall  f1-score   support

           0       0.53      0.93      0

VSEEW: Period 'max' is invalid, must be one of ['1d', '5d']


VSME   4555
Accuracy: 0.7083333333333334
Classification Report:
               precision    recall  f1-score   support

           0       0.71      1.00      0.83        17
           1       0.00      0.00      0.00         7

    accuracy                           0.71        24
   macro avg       0.35      0.50      0.41        24
weighted avg       0.50      0.71      0.59        24

Accuracy List Length : 3882


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


VSMV   4556
Accuracy: 0.5457227138643068
Classification Report:
               precision    recall  f1-score   support

           0       0.33      0.07      0.11       144
           1       0.57      0.90      0.69       195

    accuracy                           0.55       339
   macro avg       0.45      0.48      0.40       339
weighted avg       0.47      0.55      0.45       339

Accuracy List Length : 3883
VSSYW   4557
Accuracy: 0.725
Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.95      0.84       121
           1       0.14      0.03      0.04        39

    accuracy                           0.72       160
   macro avg       0.45      0.49      0.44       160
weighted avg       0.60      0.72      0.65       160

Accuracy List Length : 3884
VSTA   4558
Accuracy: 0.6120218579234973
Classification Report:
               precision    recall  f1-score   support

           0       0.61      0.99      0.76       11

VSTEW: Period 'max' is invalid, must be one of ['1d', '5d']


VSTM   4561
Accuracy: 0.5482815057283142
Classification Report:
               precision    recall  f1-score   support

           0       0.56      0.97      0.71       342
           1       0.18      0.01      0.01       269

    accuracy                           0.55       611
   macro avg       0.37      0.49      0.36       611
weighted avg       0.39      0.55      0.40       611

Accuracy List Length : 3887
VTC   4562
Accuracy: 0.5625
Classification Report:
               precision    recall  f1-score   support

           0       0.33      0.07      0.11       131
           1       0.58      0.90      0.71       189

    accuracy                           0.56       320
   macro avg       0.46      0.49      0.41       320
weighted avg       0.48      0.56      0.47       320

Accuracy List Length : 3888
VTGN   4563
Accuracy: 0.6464174454828661
Classification Report:
               precision    recall  f1-score   support

           0       0.66      0.96      0.78       419

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
accuracy_list.to_csv("Accuracy_Data_NASDAQ.csv",index=False)