In [9]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [11]:
# Load and preprocess data
def load_data(file_path):
    data = pd.read_csv(file_path)
    data['timestamp'] = pd.to_datetime(data['timestamp'])
    data.set_index('timestamp', inplace=True)
    return data

# ARIMA model for linear patterns
def fit_arima(data, order=(1, 1, 1)):
    model = ARIMA(data, order=order)
    arima_fit = model.fit()
    return arima_fit

# Prepare data for neural network
def prepare_nn_data(residuals, original_data, future_steps=15):
    residuals = residuals[~np.isnan(residuals)]  # Drop NaN residuals
    X, y = [], []
    for i in range(len(residuals) - future_steps):
        X.append(residuals[i:i + future_steps])
        y.append(original_data[i + future_steps])
    return np.array(X), np.array(y)

# Build LSTM neural network
def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(64, activation='relu', input_shape=input_shape, return_sequences=False))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1))  # Predict single value (price)
    model.compile(optimizer='adam', loss='mse')
    return model

# Combine ARIMA and LSTM predictions
def predict_hybrid(arima_fit, lstm_model, data, scaler, future_steps=15):
    arima_forecast = arima_fit.forecast(steps=future_steps)[-1]  # ARIMA prediction
    nn_input = data[-future_steps:]  # Last residuals for NN input
    nn_input = scaler.transform(nn_input.reshape(-1, 1)).reshape(1, -1, 1)
    nn_forecast = lstm_model.predict(nn_input)
    return arima_forecast + scaler.inverse_transform(nn_forecast).flatten()[0]

# Main workflow
def main(file_path):
    # Step 1: Load data
    data = load_data(file_path)
    close_prices = data['close']

    # Step 2: Fit ARIMA
    arima_fit = fit_arima(close_prices)

    # Step 3: Extract ARIMA residuals
    residuals = arima_fit.resid

    # Step 4: Scale residuals for NN
    scaler = MinMaxScaler()
    residuals_scaled = scaler.fit_transform(residuals.values.reshape(-1, 1))

    # Step 5: Prepare data for NN
    X, y = prepare_nn_data(residuals_scaled, close_prices.values)

    # Reshape X for LSTM input (samples, timesteps, features)
    X = X.reshape(X.shape[0], X.shape[1], 1)

    # Step 6: Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Step 7: Build and train LSTM model
    lstm_model = build_lstm_model(X_train.shape[1:])
    lstm_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

    # Step 8: Predict using hybrid model
    hybrid_prediction = predict_hybrid(arima_fit, lstm_model, residuals.values, scaler)
    print(f"Hybrid Prediction for 15 minutes later: {hybrid_prediction}")

# Run the script
if __name__ == "__main__":
    file_path = 'data/nifty2015-2025.csv'  # file path
    main(file_path)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  super().__init__(**kwargs)


Epoch 1/20
[1m23260/23260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m248s[0m 11ms/step - loss: 29133644.0000 - val_loss: 27892294.0000
Epoch 2/20
[1m23260/23260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m248s[0m 11ms/step - loss: 25720322.0000 - val_loss: 26714006.0000
Epoch 3/20
[1m23260/23260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m233s[0m 10ms/step - loss: 25640202.0000 - val_loss: 25175744.0000
Epoch 4/20
[1m23260/23260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m262s[0m 11ms/step - loss: 25535236.0000 - val_loss: 25861092.0000
Epoch 5/20
[1m23260/23260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m237s[0m 10ms/step - loss: 25498434.0000 - val_loss: 25187614.0000
Epoch 6/20
[1m23260/23260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m218s[0m 9ms/step - loss: 25490868.0000 - val_loss: 25176562.0000
Epoch 7/20
[1m23260/23260[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m226s[0m 10ms/step - loss: 25429938.0000 - val_loss: 25695130.0000
Epoch 8/20
[1

  return get_prediction_index(
  return get_prediction_index(


KeyError: -1

In [None]:
# import pandas as pd
# import numpy as np
# import talib
# from sklearn.preprocessing import StandardScaler, MinMaxScaler
# from sklearn.metrics import accuracy_score, confusion_matrix
# from statsmodels.tsa.arima.model import ARIMA
# from arch import arch_model
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import LSTM, Dense
# from sklearn.model_selection import train_test_split

# # Load and preprocess data
# def load_and_preprocess_data(file_path, interval='15T'):
#     df = pd.read_csv(file_path, parse_dates=['timestamp'])
#     df['timestamp'] = df['timestamp'].dt.tz_localize(None)
#     df.set_index('timestamp', inplace=True)

#     df = df.resample(interval).agg({
#         'open': 'first',
#         'high': 'max',
#         'low': 'min',
#         'close': 'last',
#         'volume': 'sum'
#     }).dropna()

#     scaler = StandardScaler()
#     df[['open', 'high', 'low', 'close', 'volume']] = scaler.fit_transform(df[['open', 'high', 'low', 'close', 'volume']])

#     return df, scaler

# # Add technical indicators
# def add_technical_indicators(df):
#     df['RSI_14'] = talib.RSI(df['close'], timeperiod=14)
#     df['MA_50'] = talib.SMA(df['close'], timeperiod=50)
#     df['EMA_20'] = talib.EMA(df['close'], timeperiod=20)
#     df['MACD'], df['MACD_signal'], _ = talib.MACD(df['close'], fastperiod=12, slowperiod=26, signalperiod=9)
#     df['BB_upper'], df['BB_middle'], df['BB_lower'] = talib.BBANDS(df['close'], timeperiod=20)
#     df.dropna(inplace=True)
#     return df

# # Create target variable
# def create_target_variable(df, future_steps=15, threshold=3):
#     df['future_close'] = df['close'].shift(-future_steps)
#     df['direction'] = np.where(df['future_close'] > df['close'], 1, 0)
#     df['price_diff'] = df['future_close'] - df['close']
#     df['correct_movement'] = np.where(df['price_diff'].abs() <= threshold, 1, 0)
#     df.dropna(inplace=True)
#     return df

# # Prepare data for neural network
# def prepare_nn_data(df, feature_columns, future_steps=15):
#     X = df[feature_columns].values
#     y = df[['direction', 'correct_movement']].values
#     scaler = MinMaxScaler()
#     X_scaled = scaler.fit_transform(X)
    
#     X_seq, y_seq = [], []
#     for i in range(len(X_scaled) - future_steps):
#         X_seq.append(X_scaled[i:i + future_steps])
#         y_seq.append(y[i + future_steps])

#     return np.array(X_seq), np.array(y_seq), scaler

# # Fit ARIMA and GARCH models
# def fit_arima(data):
#     model = ARIMA(data, order=(1,1,1))
#     return model.fit()

# def fit_garch(data):
#     garch = arch_model(data, vol="Garch", p=1, q=1)
#     return garch.fit(disp='off')

# # Build LSTM model
# def build_lstm_model(input_shape):
#     model = Sequential([
#         LSTM(64, activation='relu', input_shape=input_shape, return_sequences=True),
#         LSTM(32, activation='relu'),
#         Dense(16, activation='relu'),
#         Dense(2, activation='sigmoid')
#     ])
#     model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
#     return model

# # Evaluate model performance
# def evaluate_model(model, X_test, y_test):
#     y_pred = (model.predict(X_test) > 0.5).astype(int)
#     direction_accuracy = accuracy_score(y_test[:, 0], y_pred[:, 0])
#     movement_accuracy = accuracy_score(y_test[:, 1], y_pred[:, 1])
    
#     print(f"Direction Accuracy: {direction_accuracy:.2f}")
#     print("Direction Confusion Matrix:\n", confusion_matrix(y_test[:, 0], y_pred[:, 0]))
#     print(f"Price Movement Accuracy: {movement_accuracy:.2f}")
#     print("Movement Confusion Matrix:\n", confusion_matrix(y_test[:, 1], y_pred[:, 1]))

#     return direction_accuracy, movement_accuracy

# # Main workflow
# def main(file_path):
#     df, scaler = load_and_preprocess_data(file_path)
#     df = add_technical_indicators(df)
#     df = create_target_variable(df)

#     feature_cols = ['open', 'high', 'low', 'close', 'volume', 'RSI_14', 'MA_50', 'EMA_20', 'MACD', 'BB_upper']
#     X, y, scaler = prepare_nn_data(df, feature_cols)

#     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#     # Train ARIMA and GARCH models
#     arima_fit = fit_arima(df['close'])
#     garch_fit = fit_garch(df['close'])

#     df['GARCH_Residuals'] = garch_fit.resid
#     df.dropna(inplace=True)

#     # Train LSTM model
#     lstm_model = build_lstm_model(X_train.shape[1:])
#     lstm_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

#     # Evaluate LSTM model
#     dir_acc, move_acc = evaluate_model(lstm_model, X_test, y_test)

#     print(f"Final Accuracy - Direction: {dir_acc:.2f}, Price Movement: {move_acc:.2f}")

# if __name__ == "__main__":
#     file_path = 'data/nifty2015-2025.csv'
#     main(file_path)


In [None]:
import pandas as pd
import numpy as np
import talib
import optuna
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import accuracy_score, confusion_matrix
from statsmodels.tsa.arima.model import ARIMA
from arch import arch_model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split

# Load and preprocess data
def load_and_preprocess_data(file_path, interval='15T'):
    df = pd.read_csv(file_path, parse_dates=['timestamp'])
    df['timestamp'] = df['timestamp'].dt.tz_localize(None)
    df.set_index('timestamp', inplace=True)

    df = df.resample(interval).agg({
        'open': 'first',
        'high': 'max',
        'low': 'min',
        'close': 'last',
        'volume': 'sum'
    }).dropna()

    scaler = StandardScaler()
    df[['open', 'high', 'low', 'close', 'volume']] = scaler.fit_transform(df[['open', 'high', 'low', 'close', 'volume']])

    return df, scaler

# Add technical indicators
def add_technical_indicators(df):
    df['RSI_14'] = talib.RSI(df['close'], timeperiod=14)
    df['MA_50'] = talib.SMA(df['close'], timeperiod=50)
    df['EMA_20'] = talib.EMA(df['close'], timeperiod=20)
    df['MACD'], df['MACD_signal'], _ = talib.MACD(df['close'], fastperiod=12, slowperiod=26, signalperiod=9)
    df['BB_upper'], df['BB_middle'], df['BB_lower'] = talib.BBANDS(df['close'], timeperiod=20)
    df.dropna(inplace=True)
    return df

# Create target variable
def create_target_variable(df, future_steps=15, threshold=3):
    df['future_close'] = df['close'].shift(-future_steps)
    df['direction'] = np.where(df['future_close'] > df['close'], 1, 0)
    df['price_diff'] = df['future_close'] - df['close']
    df['correct_movement'] = np.where(df['price_diff'].abs() <= threshold, 1, 0)
    df.dropna(inplace=True)
    return df

# Prepare data for neural network
def prepare_nn_data(df, feature_columns, future_steps=15):
    X = df[feature_columns].values
    y = df[['direction', 'correct_movement']].values
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)

    X_seq, y_seq = [], []
    for i in range(len(X_scaled) - future_steps):
        X_seq.append(X_scaled[i:i + future_steps])
        y_seq.append(y[i + future_steps])

    return np.array(X_seq), np.array(y_seq), scaler

# Fit ARIMA and GARCH models
def fit_arima(data):
    model = ARIMA(data, order=(1,1,1))
    return model.fit()

def fit_garch(data):
    garch = arch_model(data, vol="Garch", p=1, q=1)
    return garch.fit(disp='off')

# Build and train LSTM model with hyperparameter tuning
def objective(trial):
    n_lstm_layers = trial.suggest_int('n_lstm_layers', 1, 3)
    lstm_units = trial.suggest_int('lstm_units', 32, 128)
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)

    model = Sequential()
    for _ in range(n_lstm_layers):
        model.add(LSTM(lstm_units, activation='relu', return_sequences=True))
        model.add(Dropout(dropout_rate))
    model.add(LSTM(lstm_units // 2, activation='relu', return_sequences=False))
    model.add(Dense(2, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    X_train, X_test, y_train, y_test = train_test_split(X_global, y_global, test_size=0.2, random_state=42)
    model.fit(X_train, y_train, epochs=20, batch_size=batch_size, validation_data=(X_test, y_test), verbose=0)

    y_pred = (model.predict(X_test) > 0.5).astype(int)
    accuracy = accuracy_score(y_test[:, 0], y_pred[:, 0])

    return accuracy

# Evaluate model performance
def evaluate_model(model, X_test, y_test):
    y_pred = (model.predict(X_test) > 0.5).astype(int)
    direction_accuracy = accuracy_score(y_test[:, 0], y_pred[:, 0])
    movement_accuracy = accuracy_score(y_test[:, 1], y_pred[:, 1])

    print(f"Direction Accuracy: {direction_accuracy:.2f}")
    print("Direction Confusion Matrix:\n", confusion_matrix(y_test[:, 0], y_pred[:, 0]))
    print(f"Price Movement Accuracy: {movement_accuracy:.2f}")
    print("Movement Confusion Matrix:\n", confusion_matrix(y_test[:, 1], y_pred[:, 1]))

    return direction_accuracy, movement_accuracy

# Main workflow
def main(file_path):
    global X_global, y_global
    timeframes = ['5T', '15T', '30T']
    best_results = {}

    for interval in timeframes:
        print(f"Processing data for interval: {interval}")
        df, scaler = load_and_preprocess_data(file_path, interval)
        df = add_technical_indicators(df)
        df = create_target_variable(df)

        feature_cols = ['open', 'high', 'low', 'close', 'volume', 'RSI_14', 'MA_50', 'EMA_20', 'MACD', 'BB_upper']
        X, y, scaler = prepare_nn_data(df, feature_cols)
        
        X_global, y_global = X, y  # Save globally for Optuna
        
        # Optimize hyperparameters using Optuna
        study = optuna.create_study(direction='maximize')
        study.optimize(objective, n_trials=20)

        print(f"Best trial for {interval}: {study.best_trial.params}")
        best_results[interval] = study.best_trial.params

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Train best LSTM model
        best_params = study.best_trial.params
        model = Sequential()
        for _ in range(best_params['n_lstm_layers']):
            model.add(LSTM(best_params['lstm_units'], activation='relu', return_sequences=True))
            model.add(Dropout(best_params['dropout_rate']))
        model.add(LSTM(best_params['lstm_units'] // 2, activation='relu'))
        model.add(Dense(2, activation='sigmoid'))
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        model.fit(X_train, y_train, epochs=20, batch_size=best_params['batch_size'], validation_data=(X_test, y_test))

        # Evaluate model
        dir_acc, move_acc = evaluate_model(model, X_test, y_test)

    print("Best results across timeframes:", best_results)

if __name__ == "__main__":
    file_path = 'data/nifty2015-2025.csv'
    main(file_path)


  from .autonotebook import tqdm as notebook_tqdm


Processing data for interval: 5T
