<a href="https://colab.research.google.com/github/Akarsh-Kumar-2004/stock-finance/blob/main/WIEEEE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

akarsh8_dataset_2_path = kagglehub.dataset_download('akarsh8/dataset-2')
akarsh8_dataset_1_path = kagglehub.dataset_download('akarsh8/dataset-1')

print('Data source import complete.')


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, GRU, Dropout, BatchNormalization, SpatialDropout1D
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
from tensorflow.keras.regularizers import l2
import json
from scipy.special import expit

class EnhancedMarketPredictor:
    def __init__(self):
        self.feature_scaler = RobustScaler()
        self.target_scaler = RobustScaler()
        self.model = self._build_optimized_model()
        self.market_regime = None

    def _build_optimized_model(self):
        """Enhanced model architecture with balanced regularization"""
        inputs = Input(shape=(None, 8))  # Updated for new features

        x = GRU(128, return_sequences=True,
                kernel_regularizer=l2(0.0005), recurrent_dropout=0.2)(inputs)
        x = SpatialDropout1D(0.4)(x)
        x = BatchNormalization()(x)
        x = GRU(64, kernel_regularizer=l2(0.0005))(x)
        x = Dropout(0.5)(x)
        x = Dense(32, activation='relu', kernel_regularizer=l2(0.0005))(x)
        output = Dense(1, activation='linear')(x)

        model = Model(inputs=inputs, outputs=output)
        model.compile(
            optimizer=tf.keras.optimizers.Adam(3e-5),
            loss=self._dynamic_loss,
            metrics=[self._safe_direction_accuracy, 'mae']
        )
        return model

    def _dynamic_loss(self, y_true, y_pred):
        """Consistent Huber loss for stability"""
        return tf.keras.losses.huber(y_true, y_pred)

    def _safe_direction_accuracy(self, y_true, y_pred):
        """Robust directional accuracy with edge case handling"""
        y_true_diff = y_true[1:] - y_true[:-1]
        y_pred_diff = y_pred[1:] - y_pred[:-1]
        return tf.reduce_mean(tf.cast(
            tf.equal(tf.sign(y_true_diff),
            tf.sign(y_pred_diff)
        ), tf.float32))

    def _detect_market_regime(self, df):
        """Enhanced regime detection with volatility clustering"""
        returns = df['close'].pct_change().dropna()
        volatility = returns.rolling(10).std().dropna()
        self.market_regime = 'downturn' if volatility.mean() > 0.015 else 'normal'
        print(f"Detected market regime: {self.market_regime}")

    def _create_features(self, df):
        """Enhanced feature engineering with momentum indicators"""
        df['returns'] = df['close'].pct_change().clip(-0.3, 0.3)
        df['momentum_3'] = df['close'].pct_change(3)
        df['momentum_7'] = df['close'].pct_change(7)

        # Volatility features
        windows = [3, 7] if self.market_regime == 'downturn' else [10, 20]
        for w in windows:
            df[f'vol_{w}'] = df['returns'].rolling(w).std().fillna(0)
            df[f'mdd_{w}'] = (df['close'].rolling(w).max() - df['close']) / df['close'].rolling(w).max()

        return df.ffill().bfill().fillna(0)

    def load_and_prepare(self, filepaths):
        """Load and prepare data from filepaths"""
        dfs = []
        for path in filepaths:
            df = pd.read_csv(path)
            df.columns = df.columns.str.lower().str.strip()
            dfs.append(df)

        combined = pd.concat(dfs).sort_index()
        self._detect_market_regime(combined)
        return self._create_features(combined)

    def prepare_sequences(self, df, lookback=30):
        """Leakage-proof sequence preparation"""
        features = ['close', 'vol_3', 'vol_7', 'mdd_3', 'mdd_7',
                  'momentum_3', 'momentum_7', 'returns']

        # Split before scaling
        split_idx = int((len(df) - lookback) * 0.8)
        train_df = df.iloc[:split_idx + lookback]
        test_df = df.iloc[split_idx:]

        # Fit scalers only on training data
        self.feature_scaler.fit(train_df[features])
        self.target_scaler.fit(train_df[['close']])

        # Transform both sets
        train_features = self.feature_scaler.transform(train_df[features])
        test_features = self.feature_scaler.transform(test_df[features])
        train_target = self.target_scaler.transform(train_df[['close']])
        test_target = self.target_scaler.transform(test_df[['close']])

        # Create sequences
        def create_sequences(features, target):
            X, y = [], []
            for i in range(len(features) - lookback):
                X.append(features[i:i+lookback])
                y.append(target[i+lookback])
            return np.array(X), np.array(y)

        X_train, y_train = create_sequences(train_features, train_target)
        X_val, y_val = create_sequences(test_features, test_target)

        return X_train, y_train, X_val, y_val

    def train(self, X_train, y_train, X_val, y_val):
        """Optimized training configuration"""
        early_stop = EarlyStopping(
            monitor='val_loss',
            patience=10,
            restore_best_weights=True,
            min_delta=0.0005
        )

        lr_reducer = ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.2,
            patience=3,
            verbose=1,
            min_lr=1e-6
        )

        history = self.model.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=20,
            batch_size=128,
            callbacks=[early_stop, lr_reducer],
            verbose=1
        )

        # Analysis of best epoch
        best_epoch = np.argmin(history.history['val_loss'])
        print(f"\nBest Model at Epoch {best_epoch+1}:")
        print(f"Train Loss: {history.history['loss'][best_epoch]:.4f}")
        print(f"Val Loss: {history.history['val_loss'][best_epoch]:.4f}")
        print(f"Direction Accuracy: {history.history['val_safe_direction_accuracy'][best_epoch]:.4f}")

        return history

    def predict_directions(self, X_test):
        """Predict whether stock will go up or down with confidence scores"""
        y_pred = self.model.predict(X_test)

        directions = []
        confidences = []

        for i in range(1, len(y_pred)):
            direction = 1 if y_pred[i] > y_pred[i-1] else 0
            confidence = expit(np.abs(y_pred[i] - y_pred[i-1]))

            directions.append(direction)
            confidences.append(float(confidence))

        return directions, confidences

    def save_direction_predictions(self, X_test, filename='direction_predictions.json'):
        """Save direction predictions to JSON file"""
        directions, confidences = self.predict_directions(X_test)

        predictions = {
            "predictions": [
                {
                    "direction": "up" if direction == 1 else "down",
                    "confidence": confidence,
                    "next_period": i+1
                }
                for i, (direction, confidence) in enumerate(zip(directions, confidences))
            ],
            "metadata": {
                "model_type": "GRU",
                "market_regime": self.market_regime,
                "prediction_horizon": "next_period"
            }
        }

        with open(filename, 'w') as f:
            json.dump(predictions, f, indent=4)

        print(f"Direction predictions saved to {filename}")
        return predictions

# Main Execution
if __name__ == "__main__":
    try:
        predictor = EnhancedMarketPredictor()
        data = predictor.load_and_prepare([
            "/kaggle/input/dataset-1/Dataset1.csv",
            "/kaggle/input/dataset-2/Dataset2.csv"
        ])

        # Prepare data with built-in validation split
        X_train, y_train, X_val, y_val = predictor.prepare_sequences(data)

        # Train and print metrics
        history = predictor.train(X_train, y_train, X_val, y_val)

        # Save direction predictions
        predictions = predictor.save_direction_predictions(X_val)

        # Plot training history
        plt.figure(figsize=(12, 6))
        plt.subplot(1, 2, 1)
        plt.plot(history.history['safe_direction_accuracy'], label='Train')
        plt.plot(history.history['val_safe_direction_accuracy'], label='Validation')
        plt.title('Direction Accuracy')
        plt.ylabel('Accuracy')
        plt.legend()

        plt.subplot(1, 2, 2)
        plt.plot(history.history['mae'], label='Train')
        plt.plot(history.history['val_mae'], label='Validation')
        plt.title('Mean Absolute Error')
        plt.ylabel('MAE')
        plt.xlabel('Epoch')
        plt.legend()
        plt.tight_layout()
        plt.show()

    except Exception as e:
        print(f"Error: {str(e)}")

Detected market regime: downturn


  return op(a, b)
  return op(a, b)


Epoch 1/20
[1m 657/1284[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m26s[0m 42ms/step - _safe_direction_accuracy: 0.5973 - loss: 0.7853 - mae: 1.0272