In [1]:
# Import Libraries
import pandas as pd
import numpy as np
import talib
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt


In [2]:
# Import Libraries
import pandas as pd
import numpy as np
import talib
from statsmodels.tsa.arima.model import ARIMA
from arch import arch_model  # For GARCH
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt

# Function to Load and Resample Data
def load_and_resample_data(file_path, interval='15T'):
    data = pd.read_csv(file_path)
    data['timestamp'] = pd.to_datetime(data['timestamp'])
    data.set_index('timestamp', inplace=True)
    data = data.resample(interval).agg({
        'open': 'first',
        'high': 'max',
        'low': 'min',
        'close': 'last',
        'volume': 'sum'
    }).dropna()  # Drop NaN values from resampling
    return data

# Add Technical Indicators (RSI, MA, EMA) Using TA-Lib
def add_indicators(data, ma_periods, rsi_period=14):
    for period in ma_periods:
        data[f"MA_{period}"] = talib.SMA(data['close'], timeperiod=period)
        data[f"EMA_{period}"] = talib.EMA(data['close'], timeperiod=period)
    data["RSI"] = talib.RSI(data['close'], timeperiod=rsi_period)
    return data

# Fit GARCH Model for Volatility Prediction
def fit_garch(data):
    model = arch_model(data, vol="Garch", p=1, q=1)
    garch_fit = model.fit(disp="off")
    return garch_fit

# Prepare Data for Neural Network
def prepare_nn_data(data, future_steps=15):
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(data)

    X, y = [], []
    for i in range(len(scaled_data) - future_steps):
        X.append(scaled_data[i:i + future_steps])
        y.append(scaled_data[i + future_steps, 0])  # Predicting future 'close' prices
    X, y = np.array(X), np.array(y)

    return X, y, scaler

# Build and Train LSTM Neural Network
def build_and_train_lstm(X_train, y_train, X_val, y_val, input_shape, epochs=20, batch_size=32):
    model = Sequential()
    model.add(LSTM(64, activation='relu', input_shape=input_shape, return_sequences=True))
    model.add(LSTM(32, activation='relu', return_sequences=False))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(1))  # Predict single value (price)
    model.compile(optimizer='adam', loss='mse')

    # Train the model
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_val, y_val))
    return model

# Evaluate Model Performance
def evaluate_model(model, X_test, y_test, scaler):
    predictions = model.predict(X_test)
    predictions = scaler.inverse_transform(np.concatenate([predictions, np.zeros((predictions.shape[0], X_test.shape[2] - 1))], axis=1))[:, 0]
    y_test_rescaled = scaler.inverse_transform(np.concatenate([y_test.reshape(-1, 1), np.zeros((y_test.shape[0], X_test.shape[2] - 1))], axis=1))[:, 0]

    # Calculate Metrics
    rmse = np.sqrt(mean_squared_error(y_test_rescaled, predictions))
    mae = mean_absolute_error(y_test_rescaled, predictions)

    # Plot Actual vs Predicted
    plt.figure(figsize=(14, 7))
    plt.plot(y_test_rescaled, label="Actual Prices", color="blue")
    plt.plot(predictions, label="Predicted Prices", color="orange")
    plt.title("Actual vs Predicted Prices")
    plt.legend()
    plt.show()

    print(f"RMSE: {rmse:.2f}, MAE: {mae:.2f}")
    return rmse, mae

# Main Workflow
def main(file_path, ma_periods, rsi_period=14, future_steps=15):
    # Step 1: Load and resample data
    data = load_and_resample_data(file_path)

    # Step 2: Add technical indicators
    data = add_indicators(data, ma_periods, rsi_period)

    # Step 3: Fit GARCH model and add residuals
    garch_fit = fit_garch(data['close'])
    data['GARCH_Residuals'] = garch_fit.resid

    # Drop rows with NaN values after adding indicators
    data.dropna(inplace=True)

    # Step 4: Prepare data for NN
    feature_columns = ['close'] + [f"MA_{period}" for period in ma_periods] + \
                      [f"EMA_{period}" for period in ma_periods] + ["RSI", "GARCH_Residuals"]
    X, y, scaler = prepare_nn_data(data[feature_columns], future_steps)

    # Step 5: Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Step 6: Build and train LSTM model
    lstm_model = build_and_train_lstm(X_train, y_train, X_test, y_test, X_train.shape[1:])

    # Step 7: Evaluate model performance
    rmse, mae = evaluate_model(lstm_model, X_test, y_test, scaler)

    # Summary of results
    print("Model Evaluation Summary:")
    print(f"RMSE: {rmse:.2f}")
    print(f"MAE: {mae:.2f}")

# Define File Path and Parameters
file_path = 'data/nifty2015-2025.csv'  # Replace with your file path
ma_periods = [5, 15, 50, 200]  # Moving average periods to test
rsi_period = 14
future_steps = 15

# Run the Workflow
main(file_path, ma_periods, rsi_period, future_steps)


Fitting ARIMA model for moving average period: 5


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


ValueError: Found input variables with inconsistent numbers of samples: [930213, 930214]