<a href="https://colab.research.google.com/github/ATOMworkplace/FinanceCraft/blob/main/Technical_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ---------------------------------------------------------
# Import Necessary Libraries
# ---------------------------------------------------------
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, classification_report,
    confusion_matrix, roc_auc_score
)
from sklearn.preprocessing import StandardScaler

import ipywidgets as widgets
from IPython.display import display

sns.set_style('whitegrid')

# ---------------------------------------------------------
# 1) Initialize Progress Bar
# ---------------------------------------------------------
progress = widgets.IntProgress(
    value=0,
    min=0,
    max=14,  # Adjust if necessary based on steps
    step=1,
    description='Processing:',
    bar_style='info',
    orientation='horizontal'
)
display(progress)

# ---------------------------------------------------------
# 2) Fetch Data (30 years daily)
#    Potential improvement: fetch 10y or 15y for more data
# ---------------------------------------------------------
progress.description = 'Fetching Data'
ticker = 'AAPL'
df = yf.download(ticker, period='5y', interval='1d')
print("Initial shape:", df.shape)
progress.value += 1

# ---------------------------------------------------------
# 3) Ensure Proper Column Names
# ---------------------------------------------------------
progress.description = 'Processing Columns'
if isinstance(df.columns, pd.MultiIndex):
    df.columns = [f"{c[0]}_{c[1]}" if c[1] else c[0] for c in df.columns]
    df = df.rename(columns={
        'Close_AAPL': 'Close',
        'High_AAPL':  'High',
        'Low_AAPL':   'Low',
        'Open_AAPL':  'Open',
        'Volume_AAPL':'Volume'
    })
print("Columns after processing:", df.columns.tolist())
progress.value += 1

# ---------------------------------------------------------
# 4) Define Indicator Functions
# ---------------------------------------------------------
progress.description = 'Defining Indicators'

def EMA(data, window=20):
    """Exponential Moving Average"""
    return data['Close'].ewm(span=window, adjust=False).mean()

def MACD(data, fast=12, slow=26, signal=9):
    """Moving Average Convergence Divergence"""
    fast_ema = EMA(data, fast)
    slow_ema = EMA(data, slow)
    macd_line = fast_ema - slow_ema
    signal_line = macd_line.ewm(span=signal, adjust=False).mean()
    return macd_line, signal_line

def RSI(data, period=14):
    """Relative Strength Index"""
    delta = data['Close'].diff()
    gain = (delta.mask(delta < 0, 0)).rolling(window=period).mean()
    loss = (-delta.mask(delta > 0, 0)).rolling(window=period).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

def Bollinger_Bands(data, window=20, num_std=2):
    """Bollinger Bands"""
    sma = data['Close'].rolling(window).mean()
    std = data['Close'].rolling(window).std()
    upper = sma + (num_std * std)
    lower = sma - (num_std * std)
    return upper, lower

def Stochastic_Oscillator(data, k_window=14, d_window=3):
    """Stochastic Oscillator"""
    low_min = data['Low'].rolling(window=k_window).min()
    high_max = data['High'].rolling(window=k_window).max()
    stoch_k = 100 * ((data['Close'] - low_min) / (high_max - low_min))
    stoch_d = stoch_k.rolling(window=d_window).mean()
    return stoch_k, stoch_d

progress.value += 1

# ---------------------------------------------------------
# 5) Compute Indicators
# ---------------------------------------------------------
progress.description = 'Computing Indicators'
df['EMA_20'] = EMA(df, 20)
df['MACD_line'], df['MACD_signal'] = MACD(df)
df['RSI_14'] = RSI(df, 14)
df['Upper_BB'], df['Lower_BB'] = Bollinger_Bands(df, 20, 2)
df['Stoch_%K'], df['Stoch_%D'] = Stochastic_Oscillator(df, 14, 3)
progress.value += 1

# ---------------------------------------------------------
# 6) Generate Strategy Signals
# ---------------------------------------------------------
progress.description = 'Generating Strategy Signals'

# Momentum Signal: Buy when EMA_20 > previous EMA_20 and MACD_line > MACD_signal
df['MomentumSignal'] = np.where(
    (df['EMA_20'] > df['EMA_20'].shift(1)) & (df['MACD_line'] > df['MACD_signal']),
    1, -1
)

# Mean Reversion Signal: Buy when RSI < 30, Sell when RSI > 70
df['MeanRevSignal'] = np.where(
    df['RSI_14'] < 30, 1,
    np.where(df['RSI_14'] > 70, -1, 0)
)

# Bollinger Bands Signal: Buy when price < Lower BB, Sell when price > Upper BB
df['BB_Signal'] = np.where(
    df['Close'] < df['Lower_BB'], 1,
    np.where(df['Close'] > df['Upper_BB'], -1, 0)
)

# Stochastic Oscillator Signal: Buy when %K > %D and %K < 80, Sell when %K < %D and %K > 20
df['StochSignal'] = np.where(
    (df['Stoch_%K'] > df['Stoch_%D']) & (df['Stoch_%K'] < 80), 1,
    np.where((df['Stoch_%K'] < df['Stoch_%D']) & (df['Stoch_%K'] > 20), -1, 0)
)

# Combined Signal: Weighted average of individual signals
df['CombinedSignal'] = (
    df['MomentumSignal'] * 1.0 +
    df['MeanRevSignal']  * 1.0 +
    df['BB_Signal']      * 1.0 +
    df['StochSignal']    * 0.5
) / (1.0 + 1.0 + 1.0 + 0.5)
progress.value += 1

# ---------------------------------------------------------
# 7) Create Classification Target
# ---------------------------------------------------------
progress.description = 'Creating Classification Target'
df['UpDownTarget'] = np.where(df['Close'].shift(-1) > df['Close'], 1, 0)
progress.value += 1

# ---------------------------------------------------------
# 8) Drop NaNs
# ---------------------------------------------------------
progress.description = 'Dropping NaN Values'
df.dropna(inplace=True)
print("Shape after computing everything and dropna:", df.shape)
progress.value += 1

# ---------------------------------------------------------
# 9) Define Features & Split
# ---------------------------------------------------------
progress.description = 'Defining Features and Splitting Data'
feature_cols = [
    'EMA_20', 'MACD_line', 'MACD_signal', 'RSI_14',
    'Upper_BB', 'Lower_BB', 'Stoch_%K', 'Stoch_%D',
    'MomentumSignal', 'MeanRevSignal', 'BB_Signal', 'StochSignal',
    'CombinedSignal'
]

# Create a copy for potential backtesting reference
backtest_df = df.copy()

# Time-based split (80% train, 20% test)
split_index = int(len(df) * 0.8)
train_data  = df.iloc[:split_index]
test_data   = df.iloc[split_index:]

X_train = train_data[feature_cols]
y_train = train_data['UpDownTarget']
X_test  = test_data[feature_cols]
y_test  = test_data['UpDownTarget']

# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print(f"Train size: {X_train.shape}, Test size: {X_test.shape}")
progress.value += 1

# ---------------------------------------------------------
# 10) Random Forest Classifier with CV & Hyperparam Tuning
# ---------------------------------------------------------
progress.description = 'Training RandomForestClassifier (CV + Hyperparams)'

tscv = TimeSeriesSplit(n_splits=5)
param_grid_rf = {
    'n_estimators':     [100, 200, 400],
    'max_depth':        [5, 10, 20],  # removed None to reduce overfitting
    'min_samples_split':[2, 5],
    'min_samples_leaf': [1, 2]
}

clf = RandomForestClassifier(random_state=42)

rf_cv = GridSearchCV(
    clf,
    param_grid_rf,
    cv=tscv,
    scoring='accuracy',
    n_jobs=-1,
    verbose=1
)
rf_cv.fit(X_train, y_train)
print(f"Best RF Params: {rf_cv.best_params_}")

rf_best = rf_cv.best_estimator_
progress.value += 1

# ---------------------------------------------------------
# 11) Predict & Report Accuracy
# ---------------------------------------------------------
progress.description = 'Making Predictions & Calculating Accuracy'
train_preds = rf_best.predict(X_train)
test_preds  = rf_best.predict(X_test)

train_acc = accuracy_score(y_train, train_preds)
test_acc  = accuracy_score(y_test,  test_preds)

print(f"Train Accuracy: {train_acc:.2%}")
print(f"Test Accuracy : {test_acc:.2%}")
progress.value += 1

# ---------------------------------------------------------
# 12) Backtest Using Class Predictions
#    We use .loc to avoid out-of-bounds error
# ---------------------------------------------------------
progress.description = 'Backtesting Strategy'

# Corrected line: Use test_data.index instead of X_test.index
df_test = df.loc[test_data.index].copy()
df_test['PredictedClass'] = test_preds  # 1 => Up, 0 => Down

# Basic rule: if predicted = 1 => buy(1), else sell(-1)
df_test['Signal'] = np.where(df_test['PredictedClass'] == 1, 1, -1)

# Enhanced approach: Only take signals if CombinedSignal strongly agrees
df_test['Enhanced_Signal'] = np.where(
    (df_test['CombinedSignal'] > 0) & (df_test['Signal'] == 1), 1,
    np.where(
        (df_test['CombinedSignal'] < 0) & (df_test['Signal'] == -1),
        -1,
        0
    )
)

# Calculate Returns
df_test['Market_Return']   = df_test['Close'].pct_change()
df_test['Strategy_Return'] = df_test['Enhanced_Signal'].shift(1) * df_test['Market_Return']
df_test.dropna(subset=['Strategy_Return'], inplace=True)

# Cumulative Returns
df_test['Cumulative_Market']   = (1 + df_test['Market_Return']).cumprod()
df_test['Cumulative_Strategy'] = (1 + df_test['Strategy_Return']).cumprod()

print(df_test[['Close','PredictedClass','Signal','Enhanced_Signal','Market_Return','Strategy_Return']].tail(10))
progress.value += 1

# ---------------------------------------------------------
# 13) Plot Indicators and Returns
# ---------------------------------------------------------
progress.description = 'Plotting Indicators/Returns'
fig, axes = plt.subplots(5, 1, figsize=(16, 22), sharex=True)

# (Row 1) Close + Bollinger Bands
axes[0].plot(df.index, df['Close'], label='Close Price', color='black')
axes[0].plot(df.index, df['Upper_BB'], label='Upper Bollinger Band', color='blue', linestyle='--')
axes[0].plot(df.index, df['Lower_BB'], label='Lower Bollinger Band', color='blue', linestyle='--')
axes[0].set_title('Close Price with Bollinger Bands')
axes[0].legend()

# (Row 2) MACD & RSI
axes[1].plot(df.index, df['MACD_line'],   label='MACD Line',   color='red')
axes[1].plot(df.index, df['MACD_signal'], label='Signal Line', color='blue')
axes[1].axhline(0, color='gray', linestyle='--')
axes[1].plot(df.index, df['RSI_14']/100,  label='RSI/100',     color='purple', alpha=0.7)
axes[1].set_title('MACD & RSI (Scaled by 1/100 for same axis)')
axes[1].legend()

# (Row 3) Stochastic Oscillator
axes[2].plot(df.index, df['Stoch_%K'], label='%K', color='green')
axes[2].plot(df.index, df['Stoch_%D'], label='%D', color='orange')
axes[2].axhline(80, color='red', linestyle='--', alpha=0.5)
axes[2].axhline(20, color='blue', linestyle='--', alpha=0.5)
axes[2].set_title('Stochastic Oscillator')
axes[2].legend()

# (Row 4) Actual vs Predicted Stock Plot
axes[3].plot(df_test.index, df_test['Close'], label='Actual Close Price', color='black')
# Plot predicted buy signals
buy_signals = df_test[df_test['Enhanced_Signal'] == 1]
axes[3].scatter(buy_signals.index, buy_signals['Close'], marker='^', color='green', label='Buy Signal', alpha=1)
# Plot predicted sell signals
sell_signals = df_test[df_test['Enhanced_Signal'] == -1]
axes[3].scatter(sell_signals.index, sell_signals['Close'], marker='v', color='red', label='Sell Signal', alpha=1)
axes[3].set_title('Actual Close Price with Buy/Sell Signals')
axes[3].legend()

# (Row 5) Cumulative Returns
axes[4].plot(df_test.index, df_test['Cumulative_Market'], label='Market Return', color='blue')
axes[4].plot(df_test.index, df_test['Cumulative_Strategy'], label='Strategy Return', color='red')
axes[4].set_title('Cumulative Returns: Market vs. Strategy')
axes[4].legend()

plt.xlabel('Date')
plt.tight_layout()
plt.show()
progress.value += 1

# ---------------------------------------------------------
# 14) Completed
# ---------------------------------------------------------
progress.description = 'Completed'
print("Model training and evaluation completed.")


IntProgress(value=0, bar_style='info', description='Processing:', max=14)

[*********************100%***********************]  1 of 1 completed


Initial shape: (1258, 5)
Columns after processing: ['Close', 'High', 'Low', 'Open', 'Volume']
Shape after computing everything and dropna: (1239, 19)
Train size: (991, 13), Test size: (248, 13)
Fitting 5 folds for each of 36 candidates, totalling 180 fits
