In [44]:
import pandas as pd 
from itertools import combinations
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [45]:
data = pd.read_csv('D:\Master_Folder\Data Science Course\Projects\StockMarket\stock_data\SUZLON.NS_2023-01-01_to_2024-11-21_ML.csv')

In [46]:
data['Date'] = pd.to_datetime(data['Date'], dayfirst=True)
data.set_index('Date', inplace=True)


  data['Date'] = pd.to_datetime(data['Date'], dayfirst=True)


In [47]:
data['Next_1_day_close'] = data['Close'].shift(-1)
data.dropna(inplace=True)

In [48]:
data.head()

Unnamed: 0_level_0,Close,Weekday_angle,Month_angle,Temporal_Features,Next_1_day_close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-03-17,8.0,-1.256637,1.570796,1.366827,7.75
2023-03-20,7.75,0.0,1.570796,1.41181,7.8
2023-03-21,7.8,1.256637,1.570796,1.456793,7.8
2023-03-22,7.8,2.513274,1.570796,1.501776,7.75
2023-03-23,7.75,-2.513274,1.570796,1.321844,7.55


In [49]:
def evaluate_feature_combinations(data, target_col, test_ratio=0.3):
    result = []

    features = [col for col in data.columns if col != target_col]
    target = data[target_col]

    train_size = int(len(data) * (1 - test_ratio))
    train_data = data[:train_size]
    test_data = data[train_size:]

    x_train_full = train_data[features]
    y_train = train_data[target_col]
    x_test_full = test_data[features]
    y_test = test_data[target_col]

    for r in range(1, len(features) + 1):
        for feature_subset in combinations(features, r):
            feature_subset = list(feature_subset)

            # Scale features
            scaler = StandardScaler()
            x_train = scaler.fit_transform(x_train_full[feature_subset])
            x_test = scaler.transform(x_test_full[feature_subset])
            
            model = LinearRegression()
            model.fit(x_train, y_train)

            y_pred = model.predict(x_test)

            mse = mean_squared_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            mape = mape = (abs((y_test - y_pred) / y_test).mean()) * 100

            result.append({'Features': feature_subset, 'mse': mse, 'R2': r2, 'MAPE': mape})

    sorted_results = sorted(result, key=lambda x: x['R2'], reverse=True)

    return sorted_results

In [50]:
results = evaluate_feature_combinations(data, target_col = 'Next_1_day_close')

In [51]:
for i, res in enumerate(results[:10], 1):
    print(f"Rank {i}:")
    print(f"Features: {res['Features']}")
    print(f"R²: {res['R2']:.4f}, MAPE: {res['MAPE']:.2f}%, MSE: {res['mse']:.4f}\n")

Rank 1:
Features: ['Close', 'Month_angle']
R²: 0.9733, MAPE: 2.40%, MSE: 3.7773

Rank 2:
Features: ['Close', 'Temporal_Features']
R²: 0.9733, MAPE: 2.40%, MSE: 3.7785

Rank 3:
Features: ['Close']
R²: 0.9733, MAPE: 2.40%, MSE: 3.7843

Rank 4:
Features: ['Close', 'Weekday_angle', 'Month_angle']
R²: 0.9732, MAPE: 2.40%, MSE: 3.7910

Rank 5:
Features: ['Close', 'Weekday_angle', 'Temporal_Features']
R²: 0.9732, MAPE: 2.40%, MSE: 3.7910

Rank 6:
Features: ['Close', 'Month_angle', 'Temporal_Features']
R²: 0.9732, MAPE: 2.40%, MSE: 3.7910

Rank 7:
Features: ['Close', 'Weekday_angle', 'Month_angle', 'Temporal_Features']
R²: 0.9732, MAPE: 2.40%, MSE: 3.7910

Rank 8:
Features: ['Close', 'Weekday_angle']
R²: 0.9732, MAPE: 2.40%, MSE: 3.7981

Rank 9:
Features: ['Temporal_Features']
R²: -9.6439, MAPE: 54.81%, MSE: 1505.8214

Rank 10:
Features: ['Weekday_angle', 'Month_angle']
R²: -9.6448, MAPE: 54.81%, MSE: 1505.9417

