In [18]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import SVC
from joblib import dump,load
import os
from sklearn.metrics import accuracy_score, classification_report
def fetch_data(ticker):
    stock = yf.Ticker(ticker)
    data = stock.history(period="max")  # Fetch 5 years of data
    data.reset_index(inplace=True)
    return data


In [19]:
stock_list=pd.read_csv('NSE.csv')

In [20]:

def preprocess_data(data):
    # Feature engineering: Create relevant columns
    data['open-close'] = data['Open'] - data['Close']
    data['high-low'] = data['High'] - data['Low']
    data['price-change'] = data['Close'].pct_change()
    data['is_quarter_end'] = np.where(data['Date'].dt.month % 3 == 0, 1, 0)
    data['SMA_10'] = data['Close'].rolling(window=10).mean()
    data['SMA_50'] = data['Close'].rolling(window=50).mean()
    data['SMA_200'] = data['Close'].rolling(window=200).mean()
    data['EMA_10'] = data['Close'].ewm(span=10, adjust=False).mean()
    data['EMA_50'] = data['Close'].ewm(span=50, adjust=False).mean()
    data['EMA_200'] = data['Close'].ewm(span=200, adjust=False).mean()
    # Drop unnecessary columns
    data = data.drop(['Dividends', 'Stock Splits'], axis=1, errors='ignore')
    data.dropna(inplace=True)  # Handle missing values
    
    return data




In [None]:
for index, row in stock_list.iloc[:].iterrows():
    stock=row['SYMBOL']+".NS"
    data=fetch_data(stock)
    data=preprocess_data(data)
    model_dir=f"./modelsNS/{stock}/"
    if not os.path.exists(model_dir):
                os.mkdir(model_dir)
    # Define features (X) and target (y)
    features = data[['open-close', 'high-low', 'Volume', 'is_quarter_end','SMA_10','SMA_50',"SMA_200",'EMA_10','EMA_50',"EMA_200"]]
    target = np.where(data['Close'].shift(-1) > data['Close'], 1, 0)  # 1 foSr price increase, 0 otherwise
    if not data.empty:
        X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
        
        # Standardize features
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        # Initialize and train the model
        model = SVC(kernel='poly', probability=True, random_state=42)  
        model.fit(X_train, y_train)
        
        # Save the scaler and model for reuse
        
        dump(scaler, f"{model_dir}{stock}_scaler.joblib")
        dump(model, f"{model_dir}{stock}_predictor.joblib")
        y_pred = model.predict(X_test)
        
        # Evaluate performance
        print(stock)
        print("Accuracy:", accuracy_score(y_test, y_pred))
        print("Classification Report:\n", classification_report(y_test, y_pred))

20MICRONS.NS
Accuracy: 0.5473684210526316
Classification Report:
               precision    recall  f1-score   support

           0       0.55      0.98      0.71       423
           1       0.18      0.01      0.01       337

    accuracy                           0.55       760
   macro avg       0.37      0.49      0.36       760
weighted avg       0.39      0.55      0.40       760

21STCENMGM.NS
Accuracy: 0.5230263157894737
Classification Report:
               precision    recall  f1-score   support

           0       0.51      0.97      0.67       152
           1       0.71      0.08      0.14       152

    accuracy                           0.52       304
   macro avg       0.61      0.52      0.41       304
weighted avg       0.61      0.52      0.41       304

360ONE.NS
Accuracy: 0.43243243243243246
Classification Report:
               precision    recall  f1-score   support

           0       0.44      0.91      0.59       101
           1       0.31      0.03      0

In [None]:


# Make predictions



In [None]:

# Load saved model and scaler
scaler1 = load('scaler.joblib')
model1 = load('stock_price_predictor.joblib')

# Example new data
new_data = [[5.0, -2.0, 1e7, 0,77.53,77.1,76.1,78.05,77.29,76.49]]  # Replace with actual feature values
new_data_scaled = scaler1.transform(new_data)
prediction = model1.predict(new_data_scaled)
probability = model1.predict_proba(new_data_scaled)

print("Prediction:", "Up" if prediction[0] == 1 else "Down")
print("Probability of Price Increase:", probability[0][1])
