# 🎯 Focused LSTM Trading Strategy for Kaggle

This notebook implements a complete LSTM trading strategy using 13 specific technical indicators.

## 📊 What this notebook does:
- Loads your price data (OHLCV format)
- Calculates 13 focused technical indicators
- Trains an LSTM model for price prediction
- Simulates trading strategy
- Generates comprehensive performance analysis

## 🚀 To use this notebook:
1. Upload your CSV data to Kaggle (or use existing dataset)
2. Update the `DATA_PATH` in the configuration cell below
3. Run all cells sequentially
4. Check outputs in `/kaggle/working/`

In [None]:
# 📦 Install required packages
import subprocess
import sys

packages = ['tensorflow==2.13.0', 'scikit-learn', 'TA-Lib', 'matplotlib', 'seaborn']
for package in packages:
    print(f"Installing {package}...")
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', package, '--quiet'])

print("✅ All packages installed!")

In [None]:
# 📚 Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import talib

plt.style.use('seaborn-v0_8')
print("📊 Libraries imported successfully!")

In [None]:
# ⚙️ Configuration - UPDATE THE DATA_PATH FOR YOUR DATASET!
CONFIG = {
    # 🔴 UPDATE THIS PATH TO YOUR KAGGLE DATASET
    'data_path': '/kaggle/input/your-dataset-name/priceData5Year.csv',
    
    # Model settings
    'sequence_length': 60,
    'lstm_units': 50,
    'dropout_rate': 0.2,
    'epochs': 30,
    'batch_size': 32,
    'validation_split': 0.2,
    'learning_rate': 0.001,
    
    # Trading settings
    'initial_capital': 100000,
    'transaction_cost': 0.001,
    'max_position_size': 0.95
}

print("⚙️ Configuration loaded:")
print(f"   📁 Data path: {CONFIG['data_path']}")
print(f"   💰 Initial capital: ${CONFIG['initial_capital']:,}")

In [None]:
# 📂 Load and examine data
print("📂 Loading data...")

df = pd.read_csv(CONFIG['data_path'])

# Handle date column
if 'Date' in df.columns:
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)
else:
    df.index = pd.to_datetime(df.index)

print(f"✅ Data loaded: {df.shape}")
print(f"📅 Date range: {df.index.min()} to {df.index.max()}")
print("\n📊 Data overview:")
print(df.head())
print("\n📈 Price statistics:")
print(df['Close'].describe())

In [None]:
# 🔧 Calculate 13 focused technical indicators
print("🔧 Calculating 13 technical indicators...")

data = df.copy()
open_prices = data['Open'].astype(float).values
high_prices = data['High'].astype(float).values
low_prices = data['Low'].astype(float).values
close_prices = data['Close'].astype(float).values
volume = data['Volume'].astype(float).values

# Momentum Indicators (4)
print("   📈 Momentum indicators...")
data['RSI'] = talib.RSI(close_prices, timeperiod=14)
data['ROC'] = talib.ROC(close_prices, timeperiod=10)
data['STOCH_K'], data['STOCH_D'] = talib.STOCH(high_prices, low_prices, close_prices)
data['TSI'] = talib.TRIX(close_prices, timeperiod=14)

# Volume Indicators (3)
print("   📊 Volume indicators...")
data['OBV'] = talib.OBV(close_prices, volume)
data['MFI'] = talib.MFI(high_prices, low_prices, close_prices, volume)
data['PVT'] = ((close_prices - np.roll(close_prices, 1)) / np.roll(close_prices, 1) * volume).cumsum()

# Trend Indicators (3)
print("   📉 Trend indicators...")
data['TEMA'] = talib.TEMA(close_prices, timeperiod=14)
data['MACD'], data['MACD_Signal'], data['MACD_Hist'] = talib.MACD(close_prices)
data['KAMA'] = talib.KAMA(close_prices, timeperiod=30)

# Volatility Indicators (3)
print("   🌪️ Volatility indicators...")
data['ATR'] = talib.ATR(high_prices, low_prices, close_prices)
data['BB_Upper'], data['BB_Middle'], data['BB_Lower'] = talib.BBANDS(close_prices)
data['BB_Width'] = data['BB_Upper'] - data['BB_Lower']
data['BB_Position'] = (close_prices - data['BB_Lower']) / (data['BB_Upper'] - data['BB_Lower'])

# Ulcer Index
max_prices = pd.Series(close_prices).rolling(window=14).max()
drawdown = ((close_prices - max_prices) / max_prices) * 100
data['ULCER'] = np.sqrt((drawdown ** 2).rolling(window=14).mean()).values

print("✅ All indicators calculated!")

# Clean data
clean_data = data.dropna()
print(f"📊 Clean data shape: {clean_data.shape}")

In [None]:
# 🧠 Prepare data for LSTM
print("🔄 Preparing LSTM training data...")

# Feature columns (13 indicators)
feature_columns = ['RSI', 'ROC', 'STOCH_K', 'STOCH_D', 'TSI',  # Momentum
                  'OBV', 'MFI', 'PVT',  # Volume
                  'TEMA', 'MACD', 'MACD_Signal', 'MACD_Hist', 'KAMA',  # Trend
                  'ATR', 'BB_Width', 'BB_Position', 'ULCER']  # Volatility

# Prepare features and target
features = clean_data[feature_columns].values
target = clean_data['Close'].values

# Scale features
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(features)

# Create sequences
sequence_length = CONFIG['sequence_length']
X, y = [], []

for i in range(sequence_length, len(scaled_features)):
    X.append(scaled_features[i-sequence_length:i])
    y.append(target[i])

X, y = np.array(X), np.array(y)
print(f"📊 Training data shape: X={X.shape}, y={y.shape}")

In [None]:
# 🏗️ Build and train LSTM model
print("🏗️ Building LSTM model...")

model = Sequential([
    LSTM(CONFIG['lstm_units'], return_sequences=True, input_shape=(X.shape[1], X.shape[2])),
    Dropout(CONFIG['dropout_rate']),
    LSTM(CONFIG['lstm_units']//2, return_sequences=False),
    Dropout(CONFIG['dropout_rate']),
    Dense(25),
    Dense(1)
])

model.compile(optimizer=Adam(learning_rate=CONFIG['learning_rate']), loss='mse')

print("🎯 Training model...")
history = model.fit(
    X, y,
    epochs=CONFIG['epochs'],
    batch_size=CONFIG['batch_size'],
    validation_split=CONFIG['validation_split'],
    verbose=1
)

print("✅ Model training complete!")

In [None]:
# 🔮 Make predictions and evaluate
print("🔮 Making predictions...")

predictions = model.predict(X).flatten()

# Calculate metrics
mse = mean_squared_error(y, predictions)
mae = mean_absolute_error(y, predictions)
rmse = np.sqrt(mse)
r2 = r2_score(y, predictions)

print(f"📊 Model Performance:")
print(f"   MSE: {mse:.2f}")
print(f"   MAE: {mae:.2f}")
print(f"   RMSE: {rmse:.2f}")
print(f"   R²: {r2:.4f}")

# Plot training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.scatter(y, predictions, alpha=0.5)
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--')
plt.xlabel('Actual Prices')
plt.ylabel('Predicted Prices')
plt.title('Predictions vs Actual')

plt.tight_layout()
plt.show()

In [None]:
# 💼 Simulate trading strategy
print("💼 Simulating trading strategy...")

# Prepare results dataframe
start_idx = sequence_length
results = clean_data.iloc[start_idx:start_idx+len(predictions)].copy()
results['Prediction'] = predictions
results['Signal'] = 0
results['Portfolio_Value'] = CONFIG['initial_capital']

# Generate trading signals
for i in range(1, len(results)):
    if results.iloc[i]['Prediction'] > results.iloc[i-1]['Close']:
        results.iloc[i, results.columns.get_loc('Signal')] = 1  # Buy
    elif results.iloc[i]['Prediction'] < results.iloc[i-1]['Close']:
        results.iloc[i, results.columns.get_loc('Signal')] = -1  # Sell

# Simulate trading
capital = CONFIG['initial_capital']
shares = 0
trades = []

for i in range(1, len(results)):
    current_price = results.iloc[i]['Close']
    signal = results.iloc[i]['Signal']
    
    if signal == 1 and capital > 0:  # Buy
        shares_to_buy = int((capital * CONFIG['max_position_size']) // current_price)
        if shares_to_buy > 0:
            cost = shares_to_buy * current_price * (1 + CONFIG['transaction_cost'])
            if cost <= capital:
                capital -= cost
                shares += shares_to_buy
                trades.append({'Date': results.index[i], 'Action': 'BUY', 'Price': current_price, 'Shares': shares_to_buy})
    
    elif signal == -1 and shares > 0:  # Sell
        revenue = shares * current_price * (1 - CONFIG['transaction_cost'])
        capital += revenue
        trades.append({'Date': results.index[i], 'Action': 'SELL', 'Price': current_price, 'Shares': shares})
        shares = 0
    
    # Update portfolio value
    portfolio_value = capital + (shares * current_price)
    results.iloc[i, results.columns.get_loc('Portfolio_Value')] = portfolio_value

trading_log = pd.DataFrame(trades)

final_value = results['Portfolio_Value'].iloc[-1]
total_return = (final_value - CONFIG['initial_capital']) / CONFIG['initial_capital']

print(f"📊 Trading Summary:")
print(f"   Total trades: {len(trading_log)}")
print(f"   Final portfolio value: ${final_value:,.2f}")
print(f"   Total return: {total_return:.2%}")

In [None]:
# 📈 Create visualizations
print("📈 Creating performance visualizations...")

fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Price vs Predictions
axes[0, 0].plot(results.index, results['Close'], label='Actual Price', alpha=0.8)
axes[0, 0].plot(results.index, results['Prediction'], label='LSTM Prediction', alpha=0.8)
axes[0, 0].set_title('Actual vs Predicted Prices')
axes[0, 0].legend()

# Portfolio Value
axes[0, 1].plot(results.index, results['Portfolio_Value'], color='green')
axes[0, 1].set_title('Portfolio Value Over Time')
axes[0, 1].set_ylabel('Portfolio Value ($)')

# Trading Signals
buy_signals = results[results['Signal'] == 1]
sell_signals = results[results['Signal'] == -1]

axes[1, 0].plot(results.index, results['Close'], alpha=0.7)
axes[1, 0].scatter(buy_signals.index, buy_signals['Close'], color='green', marker='^', label='Buy', s=30)
axes[1, 0].scatter(sell_signals.index, sell_signals['Close'], color='red', marker='v', label='Sell', s=30)
axes[1, 0].set_title('Trading Signals')
axes[1, 0].legend()

# Returns Distribution
returns = results['Portfolio_Value'].pct_change().dropna()
axes[1, 1].hist(returns, bins=30, alpha=0.7, color='blue')
axes[1, 1].set_title('Daily Returns Distribution')
axes[1, 1].set_xlabel('Daily Returns')

plt.tight_layout()
plt.show()

# Calculate additional metrics
if len(returns) > 0:
    sharpe_ratio = returns.mean() / returns.std() * np.sqrt(252) if returns.std() > 0 else 0
    max_drawdown = ((results['Portfolio_Value'] / results['Portfolio_Value'].cummax()) - 1).min()
else:
    sharpe_ratio = 0
    max_drawdown = 0

print(f"\n💰 FINAL PERFORMANCE SUMMARY:")
print(f"="*40)
print(f"Total Return: {total_return:.2%}")
print(f"Final Value: ${final_value:,.2f}")
print(f"Profit/Loss: ${final_value - CONFIG['initial_capital']:,.2f}")
print(f"Sharpe Ratio: {sharpe_ratio:.3f}")
print(f"Max Drawdown: {max_drawdown:.2%}")
print(f"Total Trades: {len(trading_log)}")

In [None]:
# 💾 Save results
print("💾 Saving results to files...")

# Save main results
results.to_csv('/kaggle/working/kaggle_lstm_results.csv')
print("   ✅ Results saved to: kaggle_lstm_results.csv")

# Save trading log
trading_log.to_csv('/kaggle/working/kaggle_trading_log.csv', index=False)
print("   ✅ Trading log saved to: kaggle_trading_log.csv")

# Save performance summary
with open('/kaggle/working/kaggle_performance_summary.txt', 'w') as f:
    f.write("KAGGLE LSTM TRADING STRATEGY - PERFORMANCE REPORT\n")
    f.write("=" * 60 + "\n\n")
    f.write(f"Analysis Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
    f.write(f"Data Period: {clean_data.index.min()} to {clean_data.index.max()}\n\n")
    
    f.write("PERFORMANCE METRICS:\n")
    f.write("-" * 30 + "\n")
    f.write(f"Total Return: {total_return:.2%}\n")
    f.write(f"Final Value: ${final_value:,.2f}\n")
    f.write(f"Profit/Loss: ${final_value - CONFIG['initial_capital']:,.2f}\n")
    f.write(f"Sharpe Ratio: {sharpe_ratio:.3f}\n")
    f.write(f"Max Drawdown: {max_drawdown:.2%}\n")
    f.write(f"Total Trades: {len(trading_log)}\n")
    
    f.write(f"\nMODEL METRICS:\n")
    f.write("-" * 30 + "\n")
    f.write(f"MSE: {mse:.2f}\n")
    f.write(f"RMSE: {rmse:.2f}\n")
    f.write(f"MAE: {mae:.2f}\n")
    f.write(f"R²: {r2:.4f}\n")

print("   ✅ Summary saved to: kaggle_performance_summary.txt")

print(f"\n🎉 ANALYSIS COMPLETE!")
print("=" * 50)
print("📊 All files generated successfully in /kaggle/working/")
print("📈 Check the visualizations above for insights")
print("💡 Modify CONFIG settings and re-run for different strategies")