In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [12]:
df = pd.read_csv("D:\\ML PROJECTS\\AML lab\\dataset\\HistoricalQuotes.csv")

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2518 entries, 0 to 2517
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Date         2518 non-null   object
 1    Close/Last  2518 non-null   object
 2    Volume      2518 non-null   int64 
 3    Open        2518 non-null   object
 4    High        2518 non-null   object
 5    Low         2518 non-null   object
dtypes: int64(1), object(5)
memory usage: 118.2+ KB


In [14]:
# ... existing code ...
# Clean column names (remove leading/trailing spaces)
df.columns = df.columns.str.strip()

# Remove '$' and convert to float
for col in ['Close/Last', 'Open', 'High', 'Low']:
    df[col] = df[col].replace({'\$': ''}, regex=True).astype(float)
# ... existing code ...

In [15]:
# ... existing code ...
X = df[['Open', 'High', 'Low', 'Volume']]
y = df['Close/Last']
# ... existing code ...

In [16]:
from sklearn.model_selection import train_test_split

# Split data (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [17]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [18]:
from sklearn.neural_network import MLPRegressor

mlp = MLPRegressor(hidden_layer_sizes=(64, 32), max_iter=500, random_state=42)
mlp.fit(X_train_scaled, y_train)

In [19]:
from sklearn.metrics import mean_squared_error, r2_score

y_pred = mlp.predict(X_test_scaled)
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))

MSE: 0.9983911210659047
R2 Score: 0.9996888979582049


In [23]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import warnings
warnings.filterwarnings('ignore')

class StockDirectionMLP:
    def __init__(self):
        self.model = None
        self.scaler = StandardScaler()
        self.feature_columns = []
        
    def create_features(self, df):
        """Create technical indicators and features from OHLCV data"""
        # Convert price columns to numeric (they're currently 'object' type)
        price_cols = ['Close/Last', 'Open', 'High', 'Low']
        for col in price_cols:
            df[col] = pd.to_numeric(df[col].str.replace('$', ''), errors='coerce')
        
        # Sort by date to ensure proper time series order
        df['Date'] = pd.to_datetime(df['Date'])
        df = df.sort_values('Date').reset_index(drop=True)
        
        # Create features
        features_df = df.copy()
        
        # 1. Price-based features
        features_df['price_change'] = df['Close/Last'].pct_change()
        features_df['high_low_pct'] = (df['High'] - df['Low']) / df['Close/Last']
        features_df['open_close_pct'] = (df['Close/Last'] - df['Open']) / df['Open']
        features_df['high_close_pct'] = (df['High'] - df['Close/Last']) / df['Close/Last']
        features_df['low_close_pct'] = (df['Close/Last'] - df['Low']) / df['Close/Last']
        
        # 2. Moving averages (different windows)
        for window in [5, 10, 20]:
            features_df[f'sma_{window}'] = df['Close/Last'].rolling(window).mean()
            features_df[f'price_sma_{window}_ratio'] = df['Close/Last'] / features_df[f'sma_{window}']
            features_df[f'sma_{window}_slope'] = features_df[f'sma_{window}'].pct_change(periods=3)
        
        # 3. Volume features
        features_df['volume_sma_10'] = df['Volume'].rolling(10).mean()
        features_df['volume_ratio'] = df['Volume'] / features_df['volume_sma_10']
        features_df['volume_pct_change'] = df['Volume'].pct_change()
        
        # 4. Volatility features
        features_df['volatility_5'] = df['Close/Last'].rolling(5).std()
        features_df['volatility_10'] = df['Close/Last'].rolling(10).std()
        features_df['volatility_ratio'] = features_df['volatility_5'] / features_df['volatility_10']
        
        # 5. Technical indicators
        # RSI approximation
        delta = df['Close/Last'].diff()
        gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
        rs = gain / loss
        features_df['rsi'] = 100 - (100 / (1 + rs))
        
        # MACD approximation
        ema_12 = df['Close/Last'].ewm(span=12).mean()
        ema_26 = df['Close/Last'].ewm(span=26).mean()
        features_df['macd'] = ema_12 - ema_26
        features_df['macd_signal'] = features_df['macd'].ewm(span=9).mean()
        features_df['macd_histogram'] = features_df['macd'] - features_df['macd_signal']
        
        # 6. Lag features (previous days)
        for lag in [1, 2, 3]:
            features_df[f'price_change_lag_{lag}'] = features_df['price_change'].shift(lag)
            features_df[f'volume_ratio_lag_{lag}'] = features_df['volume_ratio'].shift(lag)
        
        # 7. Target variable (next day direction: 1 for UP, 0 for DOWN)
        features_df['target'] = (df['Close/Last'].shift(-1) > df['Close/Last']).astype(int)
        
        # Select feature columns (exclude original OHLCV and intermediate columns)
        self.feature_columns = [
            'price_change', 'high_low_pct', 'open_close_pct', 'high_close_pct', 'low_close_pct',
            'price_sma_5_ratio', 'price_sma_10_ratio', 'price_sma_20_ratio',
            'sma_5_slope', 'sma_10_slope', 'sma_20_slope',
            'volume_ratio', 'volume_pct_change',
            'volatility_5', 'volatility_10', 'volatility_ratio',
            'rsi', 'macd', 'macd_signal', 'macd_histogram',
            'price_change_lag_1', 'price_change_lag_2', 'price_change_lag_3',
            'volume_ratio_lag_1', 'volume_ratio_lag_2', 'volume_ratio_lag_3'
        ]
        
        return features_df
    
    def build_model(self, input_dim):
        """Build MLP architecture optimized for financial time series classification"""
        model = Sequential([
            # Input layer with batch normalization
            Dense(128, input_dim=input_dim, activation='relu', name='input_layer'),
            BatchNormalization(),
            Dropout(0.3),
            
            # First hidden layer
            Dense(64, activation='relu', name='hidden_1'),
            BatchNormalization(),
            Dropout(0.4),
            
            # Second hidden layer
            Dense(32, activation='relu', name='hidden_2'),
            BatchNormalization(),
            Dropout(0.3),
            
            # Third hidden layer (smaller for fine-tuning)
            Dense(16, activation='relu', name='hidden_3'),
            Dropout(0.2),
            
            # Output layer for binary classification
            Dense(1, activation='sigmoid', name='output_layer')
        ])
        
        # Compile with appropriate optimizer and loss function
        model.compile(
            optimizer=Adam(learning_rate=0.001),
            loss='binary_crossentropy',
            metrics=['accuracy', 'precision', 'recall']
        )
        
        return model
    
    def prepare_data(self, df):
        """Prepare features and target for training"""
        # Create features
        features_df = self.create_features(df)
        
        # Remove rows with NaN values (due to rolling windows and lags)
        features_df = features_df.dropna()
        
        # Separate features and target
        X = features_df[self.feature_columns].values
        y = features_df['target'].values
        
        # Scale features
        X_scaled = self.scaler.fit_transform(X)
        
        return X_scaled, y
    
    def train(self, df, test_size=0.2, validation_split=0.2):
        """Train the MLP model"""
        # Prepare data
        X, y = self.prepare_data(df)
        
        # Split data (maintaining time series order for financial data)
        split_idx = int(len(X) * (1 - test_size))
        X_train, X_test = X[:split_idx], X[split_idx:]
        y_train, y_test = y[:split_idx], y[split_idx:]
        
        print(f"Training samples: {len(X_train)}")
        print(f"Test samples: {len(X_test)}")
        print(f"Features: {len(self.feature_columns)}")
        
        # Build model
        self.model = self.build_model(X_train.shape[1])
        
        # Define callbacks
        early_stopping = EarlyStopping(
            monitor='val_loss', 
            patience=15, 
            restore_best_weights=True
        )
        
        lr_reduction = ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=8,
            min_lr=1e-7
        )
        
        # Train model
        history = self.model.fit(
            X_train, y_train,
            epochs=100,
            batch_size=32,
            validation_split=validation_split,
            callbacks=[early_stopping, lr_reduction],
            verbose=1
        )
        
        # Evaluate on test set
        test_loss, test_accuracy, test_precision, test_recall = self.model.evaluate(
            X_test, y_test, verbose=0
        )
        
        # Make predictions
        y_pred = (self.model.predict(X_test) > 0.5).astype(int)
        
        print(f"\n=== MODEL PERFORMANCE ===")
        print(f"Test Accuracy: {test_accuracy:.4f}")
        print(f"Test Precision: {test_precision:.4f}")
        print(f"Test Recall: {test_recall:.4f}")
        
        print(f"\n=== CLASSIFICATION REPORT ===")
        print(classification_report(y_test, y_pred, target_names=['DOWN', 'UP']))
        
        print(f"\n=== CONFUSION MATRIX ===")
        print(confusion_matrix(y_test, y_pred))
        
        return history, (X_test, y_test)
    
    def predict_direction(self, df):
        """Predict price direction for new data"""
        if self.model is None:
            raise ValueError("Model not trained yet. Call train() first.")
        
        # Prepare features (using the same scaler)
        features_df = self.create_features(df)
        features_df = features_df.dropna()
        
        X = features_df[self.feature_columns].values
        X_scaled = self.scaler.transform(X)
        
        # Get predictions
        probabilities = self.model.predict(X_scaled)
        predictions = (probabilities > 0.5).astype(int)
        
        return predictions, probabilities

# Example usage:

# Load your data
df = pd.read_csv('your_stock_data.csv')

# Initialize and train model
classifier = StockDirectionMLP()
history, test_data = classifier.train(df)

# Make predictions on new data
predictions, probabilities = classifier.predict_direction(df)


ImportError: Traceback (most recent call last):
  File "c:\Users\suyas\AppData\Local\Programs\Python\Python310\lib\site-packages\tensorflow\python\pywrap_tensorflow.py", line 73, in <module>
    from tensorflow.python._pywrap_tensorflow_internal import *
ImportError: DLL load failed while importing _pywrap_tensorflow_internal: A dynamic link library (DLL) initialization routine failed.


Failed to load the native TensorFlow runtime.
See https://www.tensorflow.org/install/errors for some common causes and solutions.
If you need help, create an issue at https://github.com/tensorflow/tensorflow/issues and include the entire stack trace above this error message.

SyntaxError: invalid syntax. Perhaps you forgot a comma? (1182499018.py, line 1)