In [97]:
import pandas as pd
import numpy as np


df = pd.read_csv('RELIANCE_15m.csv')

# Feature Engineering Function
def engineer_features(df):
    # Moving Averages
    df['SMA_5'] = df['close'].rolling(window=5).mean()
    df['EMA_5'] = df['close'].ewm(span=5, adjust=False).mean()
    
    # RSI
    delta = df['close'].diff(1)
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))

    # MACD
    df['MACD'] = df['EMA_5'] - df['close'].ewm(span=26, adjust=False).mean()
    df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()
    
    # Bollinger Bands
    df['BB_Upper'] = df['SMA_5'] + 2 * df['close'].rolling(window=5).std()
    df['BB_Lower'] = df['SMA_5'] - 2 * df['close'].rolling(window=5).std()

    # Price Action Features
    df['High_Low_Spread'] = df['high'] - df['low']
    df['Momentum'] = df['close'].diff(4)  # 4-day momentum
    df['Volatility'] = df['close'].pct_change().rolling(window=5).std()  # Rolling volatility
    
    # Lagged Features
    df['Lagged_Close'] = df['close'].shift(1)
    df['Lagged_Volume'] = df['volume'].shift(1)
    
    return df

df = engineer_features(df)
print(df)


                            date     open     high      low    close  volume  \
0      2015-02-02 09:15:00+05:30   432.90   434.30   431.55   432.70  622129   
1      2015-02-02 09:30:00+05:30   432.80   432.90   431.50   431.55  305896   
2      2015-02-02 09:45:00+05:30   431.55   432.35   431.00   431.75  246096   
3      2015-02-02 10:00:00+05:30   431.75   432.25   430.30   431.50  220553   
4      2015-02-02 10:15:00+05:30   431.50   432.00   431.05   431.50   96235   
...                          ...      ...      ...      ...      ...     ...   
58314  2024-07-19 14:15:00+05:30  3121.55  3122.50  3115.25  3120.00  135538   
58315  2024-07-19 14:30:00+05:30  3120.00  3120.10  3112.50  3117.65  180644   
58316  2024-07-19 14:45:00+05:30  3117.70  3120.55  3107.00  3110.90  309692   
58317  2024-07-19 15:00:00+05:30  3110.45  3113.80  3107.45  3108.80  737736   
58318  2024-07-19 15:15:00+05:30  3109.00  3117.95  3105.60  3116.95  643287   

         SMA_5        EMA_5        RSI 

In [99]:
df.fillna(method='ffill', inplace=True)

  df.fillna(method='ffill', inplace=True)


In [101]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# Normalization
scaler = MinMaxScaler()
normalized_features = scaler.fit_transform(df[['SMA_5', 'EMA_5', 'RSI', 'MACD', 
                                                'BB_Upper', 'BB_Lower', 
                                                'High_Low_Spread', 'Momentum', 
                                                'Volatility', 'Lagged_Close', 
                                                'Lagged_Volume']])
df_normal = pd.DataFrame(normalized_features, columns=['SMA_5', 'EMA_5', 'RSI', 'MACD', 
                                                            'BB_Upper', 'BB_Lower', 
                                                            'High_Low_Spread', 'Momentum', 
                                                            'Volatility', 'Lagged_Close', 
                                                            'Lagged_Volume'])


In [103]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# Create target variable (1 if price will go up, 0 if down)
df_normal['Target'] = (df['close'].shift(-1) > df['close']).astype(int)

# Train-Test Split
y = df_normal['Target'].iloc[1:-1]  # Exclude NaN values
X = df_normal.drop(['Target'], axis=1).iloc[1:-1]  # Exclude NaN values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest Model
model = RandomForestClassifier(n_estimators=500, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.52      0.60      0.56      5981
           1       0.50      0.42      0.45      5683

    accuracy                           0.51     11664
   macro avg       0.51      0.51      0.51     11664
weighted avg       0.51      0.51      0.51     11664

Accuracy: 0.5115740740740741


In [105]:
#strategy development

In [107]:
# Simulate a simple trading strategy based on predictions
df['Predicted_Signal'] = model.predict(df_normalized)

# Simple Backtesting Logic
initial_capital = 10000
positions = 1  # You can define your position sizing strategy
portfolio = initial_capital * (1 + df['Predicted_Signal'].shift(1).fillna(0) * df['close'].pct_change())



In [109]:
portfolio.iloc[-1]

10026.215903242408

In [111]:
# Calculate performance metrics
cumulative_returns = portfolio.iloc[-1]/ initial_capital - 1
max_drawdown = (portfolio.cummax() - portfolio).max()

print("Cumulative Returns:", cumulative_returns)
print("Max Drawdown:", max_drawdown)


Cumulative Returns: 0.002621590324240808
Max Drawdown: 1082.8579905149454
