In [38]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import SVC
from joblib import dump,load
import os
from sklearn.metrics import accuracy_score, classification_report
def fetch_data(ticker):
    stock = yf.Ticker(ticker)
    data = stock.history(period="max")  # Fetch 5 years of data
    data.reset_index(inplace=True)
    return data


In [39]:
stock_list=pd.read_csv('stocks.txt')

In [40]:

def preprocess_data(data):
    # Feature engineering: Create relevant columns
    data['open-close'] = data['Open'] - data['Close']
    data['high-low'] = data['High'] - data['Low']
    data['price-change'] = data['Close'].pct_change()
    data['is_quarter_end'] = np.where((data['Date'].dt.month % 3 == 0) & (data['Date'].dt.day > 23), 1, 0)
    data['SMA_10'] = data['Close'].rolling(window=10).mean()
    data['SMA_50'] = data['Close'].rolling(window=50).mean()
    data['SMA_200'] = data['Close'].rolling(window=200).mean()
    data['EMA_10'] = data['Close'].ewm(span=10, adjust=False).mean()
    data['EMA_50'] = data['Close'].ewm(span=50, adjust=False).mean()
    data['EMA_200'] = data['Close'].ewm(span=200, adjust=False).mean()
    # Drop unnecessary columns
    data = data.drop(['Dividends', 'Stock Splits'], axis=1, errors='ignore')
    data.dropna(inplace=True)  # Handle missing values
    
    return data




In [41]:
#accuracy_list=pd.read_csv("Accuracy_Data.csv")

#print(accuracy_list)

In [None]:
for index, row in stock_list.iloc[:10].iterrows():
    stock=row['Stocks']
    data=fetch_data(stock)
    data=preprocess_data(data)
    model_dir=f"./test/{stock}/"
    if not os.path.exists(model_dir):
                os.mkdir(model_dir)
    #accuracy_list.to_csv("Accuracy_Data.csv",index=False)
    # Define features (X) and target (y)
    features = data[['open-close', 'high-low', 'Volume', 'is_quarter_end','SMA_10','SMA_50',"SMA_200",'EMA_10','EMA_50',"EMA_200"]]
    target = np.where(data['Close'].shift(-1) > data['Close'], 1, 0)  # 1 foSr price increase, 0 otherwise
    if not data.empty:
        X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
        
        # Standardize features
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        # Initialize and train the model
        model = SVC(kernel='poly', probability=True, random_state=42)  
        model.fit(X_train, y_train)
        
        # Save the scaler and model for reuse
        
        dump(scaler, f"{model_dir}{stock}_scaler.joblib")
        dump(model, f"{model_dir}{stock}_predictor.joblib")
        y_pred = model.predict(X_test)
        
        # Evaluate performance
        print(stock," ",index)
        print("Accuracy:", accuracy_score(y_test, y_pred))
        print("Classification Report:\n", classification_report(y_test, y_pred))
        stock_accuracy=pd.DataFrame({"Symbol":[row['Stocks']],"Accuracy":[accuracy_score(y_test,y_pred)*100]})
        #accuracy_list=pd.concat([accuracy_list,stock_accuracy])
        #print("Accuracy List Length :",len(accuracy_list))
        

ABT   0
Accuracy: 0.508787742226228
Classification Report:
               precision    recall  f1-score   support

           0       0.51      0.91      0.65      1131
           1       0.50      0.09      0.16      1088

    accuracy                           0.51      2219
   macro avg       0.50      0.50      0.41      2219
weighted avg       0.50      0.51      0.41      2219

ACN   1
Accuracy: 0.5289473684210526
Classification Report:
               precision    recall  f1-score   support

           0       0.58      0.02      0.04       540
           1       0.53      0.99      0.69       600

    accuracy                           0.53      1140
   macro avg       0.55      0.50      0.36      1140
weighted avg       0.55      0.53      0.38      1140

ADBE   2
Accuracy: 0.49973614775725594
Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.92      0.65       957
           1       0.47      0.08      0.13       938

In [None]:
#accuracy_list.to_csv("Accuracy_Data.csv",index=False)