In [56]:
# data preparation
# setting the target variable

import yfinance as yf
import pandas as pd
import numpy as np
from talib import abstract

def add_bollinger_band_features(data):
    # Calculate Bollinger Bands
    upper_band, middle_band, lower_band = abstract.BBANDS(data['Close'])

    # Calculate deviation from the middle band
    data['BB_Deviation'] = (data['Close'] - middle_band) / middle_band

    # Create binary indicator for inside/outside bands
    data['Inside_BB'] = np.where((data['Close'] >= lower_band) & (data['Close'] <= upper_band), 1, 0)

    # Calculate distance from the nearest band
    data['Distance_from_Lower'] = (data['Close'] - lower_band) / middle_band
    data['Distance_from_Upper'] = (data['Close'] - upper_band) / middle_band

    return data


def add_alpha_factors(data):
    # Calculate Moving Averages
    data['SMA_10'] = abstract.SMA(data.Close, timeperiod=10)
    data['SMA_50'] = abstract.SMA(data.Close, timeperiod=50)

    # Calculate RSI
    data['RSI'] = abstract.RSI(data.Close)

    # Calculate MACD
    data['MACD'], _, _ = abstract.MACD(data.Close)

    # Calculate Bollinger Bands
    upper_band, middle_band, lower_band = abstract.BBANDS(data.Close)
    data['Upper_Band'] = upper_band
    data['Middle_Band'] = middle_band
    data['Lower_Band'] = lower_band

    # Calculate ATR
    data['ATR'] = abstract.ATR(data.High,data.Low,data.Close)

    # Calculate OBV
    data['OBV'] = abstract.OBV(data.Close,data.Volume)

    # Calculate VWAP
    #data['VWAP'] = abstract.VWAP(data)

    # Calculate custom indicator (e.g., SMA cross)
    data['SMA_Cross'] = np.where(data['SMA_10'] > data['SMA_50'], 1, 0)
    
    # Calculate the Volume spike detector
    data['10_day_avg_volume'] = data['Volume'].rolling(window=10).mean()
    data['Volume_Spike'] = (data['Volume'] > 2*data["10_day_avg_volume"]).astype(int)
    
    # Calculate the percentage of average volume
    data["VolumeRatio"]  = data["Volume"]/data["10_day_avg_volume"]
    
    data['Bullish_Engulfing'] = talib.CDLENGULFING(data['Open'], data['High'], data['Low'], data['Close'])
    data['Bearish_Engulfing'] = talib.CDLENGULFING(data['Open'], data['High'], data['Low'], data['Close'])
    
    # Stoch RSI - parameters have to be tuned
    slowk, slowd = talib.STOCH(data.High, data.Low, data.Close, fastk_period=5, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0)
    data["slowk"]= slowk
    data["slowd"]=slowd
    # ... Add more alpha factors here ...
    data = add_bollinger_band_features(data)

    return data


# setting the target variables

def forward_returns(no_of_days, data):
    data['Forward_Returns'] = data['Close'].shift(no_of_days) / data['Close'] - 1
    return data

def binary_target_with_percentage(no_of_days, data, percentage_threshold):
    data = forward_returns(no_of_days, data)  # Calculate forward returns

    # Create binary target variable: 1 for returns exceeding percentage threshold, 0 otherwise
    data['Binary_Target_Percentage'] = data['Forward_Returns'].apply(lambda x: 1 if x > percentage_threshold else 0)
    return data

# Example function to create volatility-based target variable
def volatility_based_target(no_of_days, data):
    data = forward_returns(no_of_days, data)  # Calculate forward returns

    # Calculate standard deviation of returns for the specified number of days
    data['Volatility'] = data['Close'].pct_change().rolling(window=no_of_days).std()

    # Create binary target variable: 1 for returns exceeding volatility threshold, 0 otherwise
    volatility_threshold = 2.0  # Example threshold for high volatility
    data['Volatility_Based_Target'] = data['Forward_Returns'].apply(lambda x: 1 if abs(x) > volatility_threshold * data['Volatility'].iloc[-1] else 0)
    return data

In [57]:
from load_data import get_individual_data
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import mean_squared_error, accuracy_score, classification_report, roc_auc_score, precision_score
import yfinance as yf
import datetime


In [58]:
len(sdata)

2147

In [115]:
ticker = "aegischem" + ".NS"  # Replace with the desired stock ticker
start_date = '2015-01-01'
end_date = datetime.date.today()
stock_data = yf.download(ticker, start=start_date, end=end_date)

# Add alpha factors to the stock data
stock_data_with_alpha = add_alpha_factors(stock_data)

# Display the updated stock data
data = stock_data_with_alpha

# creating the target variable
data = forward_returns(10,data)
data = binary_target_with_percentage(10,data,0.05)


# making th price stationary
data = data.dropna()
SClose = np.diff(data["Close"])
data = data[1:]
data["SClose"] = SClose 

# training the model


# Features (alpha factors)
#features = ['Alpha_Factor_1', 'Alpha_Factor_2', ...]  # Replace with actual alpha factor columns

# Target variables
target_forward_returns = 'Forward_Returns'
target_binary = 'Binary_Target_Percentage'

# Split the data into training and testing sets
X_train, X_test, y_train_forward, y_test_forward, y_train_binary, y_test_binary = train_test_split(
    data[['Close', 'Volume', 'SMA_10',
       'SMA_50', 'RSI', 'MACD', 'Upper_Band', 'Middle_Band', 'Lower_Band',
       'ATR', 'OBV', 'SMA_Cross', 'BB_Deviation', 'Inside_BB',
       'Distance_from_Lower', 'Distance_from_Upper','Bullish_Engulfing','Bearish_Engulfing', 'slowk',"slowd",
         'VolumeRatio','Volume_Spike']],
    data["Forward_Returns"],
    data["Binary_Target_Percentage"],
    test_size=0.2,
    random_state=42
)

# Train Linear Regression for Forward Returns
linear_reg_model = LinearRegression()
linear_reg_model.fit(X_train, y_train_forward)

# Train Logistic Regression for Binary Classification
#         logistic_reg_model = LogisticRegression()
#        logistic_reg_model.fit(X_train, y_train_binary)

# Make predictions
y_pred_forward = linear_reg_model.predict(X_test)
y_pred_binary = np.where(y_pred_forward > 0.05,1,0)

# Evaluate Linear Regression
mse = mean_squared_error(y_test_forward, y_pred_forward)
rmse = mse ** 0.5
r2 = linear_reg_model.score(X_test, y_test_forward)

# Evaluate Logistic Regression
accuracy = accuracy_score(y_test_binary, y_pred_binary)
precision = precision_score(y_test_binary, y_pred_binary)
#         roc_auc = roc_auc_score(y_test_binary, logistic_reg_model.predict_proba(X_test)[:, 1])  # For ROC-AUC

print(accuracy)
print(precision)

[*********************100%***********************]  1 of 1 completed
0.8785714285714286
0.7976190476190477


In [109]:
len(y_pred_binary[y_pred_binary == 1]
)

84