In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
url = 'https://anaconda.org/conda-forge/libta-lib/0.4.0/download/linux-64/libta-lib-0.4.0-h166bdaf_1.tar.bz2'
!curl -L $url | tar xj -C /usr/lib/x86_64-linux-gnu/ lib --strip-components=1
url = 'https://anaconda.org/conda-forge/ta-lib/0.4.19/download/linux-64/ta-lib-0.4.19-py310hde88566_4.tar.bz2'
!curl -L $url | tar xj -C /usr/local/lib/python3.10/dist-packages/ lib/python3.10/site-packages/talib --strip-components=3
import talib

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  3999    0  3999    0     0   5771      0 --:--:-- --:--:-- --:--:--  5770
100  517k  100  517k    0     0   298k      0  0:00:01  0:00:01 --:--:-- 1217k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  4063    0  4063    0     0   7116      0 --:--:-- --:--:-- --:--:--  7115
100  392k  100  392k    0     0   384k      0  0:00:01  0:00:01 --:--:--  384k


In [3]:
!pip install TA-Lib yfinance pandas scikit-learn matplotlib

Collecting TA-Lib
  Downloading TA-Lib-0.5.1.tar.gz (369 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m369.6/369.6 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: TA-Lib
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mBuilding wheel for TA-Lib [0m[1;32m([0m[32mpyproject.toml[0m[1;32m)[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Building wheel for TA-Lib (pyproject.toml) ... [?25l[?25herror
[31m  ERROR: Failed building wheel for TA-Lib[0m[31m
[0mFailed to build TA-Lib
[31mERROR: ERROR: Failed to build installable wheels for some pyproject.toml based pr

In [4]:

import numpy as np
import pandas as pd
import yfinance as yf
import talib as ta
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from datetime import datetime, timedelta

# Function to fetch stock market data
def fetch_stock_data(ticker, start_date, end_date, interval='5m'):
    try:
        data = yf.download(ticker, start=start_date, end=end_date, interval=interval)
        if data.empty:
            print(f"No data found for {ticker} during the specified date range.")
            return pd.DataFrame()
        return data
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return pd.DataFrame()

# Add technical indicators
def add_technical_indicators(df):
    if 'Close' not in df.columns:
        raise ValueError("Error: 'Close' column not found in DataFrame!")
    close_prices = df['Close'].values.flatten()
    df['MA5'] = ta.SMA(close_prices, timeperiod=5)
    df['MA20'] = ta.SMA(close_prices, timeperiod=20)
    df['RSI'] = ta.RSI(close_prices, timeperiod=14)
    upper, middle, lower = ta.BBANDS(close_prices, timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)
    df['BB_upper'] = upper
    df['BB_lower'] = lower
    macd, macdsignal, macdhist = ta.MACD(close_prices, fastperiod=12, slowperiod=26, signalperiod=9)
    df['MACD'] = macd
    df['MACD_signal'] = macdsignal
    return df.dropna()

# Generate targets
def generate_target(df, n_periods=10):
    df['Target'] = np.where(df['Close'].shift(-n_periods) > df['Close'], 1, 0)
    return df.dropna()

# Prepare data for training
def prepare_data(df):
    features = ['MA5', 'MA20', 'RSI', 'BB_upper', 'BB_lower', 'MACD', 'MACD_signal']
    X = df[features].values
    y = df['Target'].values
    return X, y

# Train cascade model
def train_cascade(X_train, y_train, num_models=3):
    models = []
    for i in range(num_models):
        model = MLPClassifier(hidden_layer_sizes=(10,), solver='adam', alpha=0.0001, max_iter=100000 )
        model.fit(X_train, y_train)
        models.append(model)
    return models

# Cascading prediction
def cascading_predict(models, X, y, max_impurity=0.002):
    unpruned = []
    level_accuracies = []
    correct_counts = []
    incorrect_counts = []

    for model in models:
        probs = model.predict_proba(X)
        correct_predictions = 0
        incorrect_predictions = 0
        total_predictions = 0
        next_X, next_y = [], []
        for idx, prob in enumerate(probs):
            gini = 1 - np.sum(prob ** 2)
            if gini <= max_impurity:
                if np.argmax(prob) == y[idx]:
                    correct_predictions += 1
                else:
                    incorrect_predictions += 1
                total_predictions += 1
                unpruned.append((prob, X[idx], y[idx]))
            else:
                next_X.append(X[idx])
                next_y.append(y[idx])
        level_accuracy = correct_predictions / total_predictions if total_predictions else 0
        level_accuracies.append(level_accuracy)
        correct_counts.append(correct_predictions)
        incorrect_counts.append(incorrect_predictions)
        X, y = np.array(next_X), np.array(next_y)

    return unpruned, level_accuracies, correct_counts, incorrect_counts

# Synthetic data generation
def generate_synthetic_data(size=1000):
    np.random.seed(42)
    X = np.random.rand(size, 7)
    y = (np.sum(X, axis=1) > 3.5).astype(int)
    return X, y

# Function to plot level-wise performance
def plot_level_performance(level_accuracies, correct_counts, incorrect_counts, title="Cascade Level Performance"):
    levels = range(1, len(level_accuracies) + 1)
    fig, ax1 = plt.subplots(figsize=(10, 6))

    # Bar chart for correct and incorrect counts
    ax1.bar(levels, correct_counts, label="Correct Predictions", color='green', alpha=0.7)
    ax1.bar(levels, incorrect_counts, label="Incorrect Predictions", color='red', alpha=0.7, bottom=correct_counts)
    ax1.set_ylabel("Number of Predictions")
    ax1.set_xlabel("Cascade Level")
    ax1.set_title(title)
    ax1.legend(loc="upper left")

    # Line graph for accuracy
    ax2 = ax1.twinx()
    ax2.plot(levels, level_accuracies, label="Accuracy", color='blue', marker='o', linestyle='--')
    ax2.set_ylabel("Accuracy")
    ax2.set_ylim(0, 1)
    ax2.legend(loc="upper right")

    plt.tight_layout()
    plt.show()

# Main execution
ticker = "RELIANCE.NS"
end_date = datetime.today().strftime('%Y-%m-%d')
start_date = (datetime.today() - timedelta(days=30)).strftime('%Y-%m-%d')

# Market data
stock_data = fetch_stock_data(ticker, start_date, end_date, interval='5m')
if not stock_data.empty:
    stock_data = add_technical_indicators(stock_data)
    stock_data = generate_target(stock_data)
    X_market, y_market = prepare_data(stock_data)
    market_models = train_cascade(X_market, y_market)
    unpruned_market, market_accuracies, market_correct, market_incorrect = cascading_predict(market_models, X_market, y_market)
    market_accuracy = accuracy_score([x[2] for x in unpruned_market], [np.argmax(x[0]) for x in unpruned_market])
    print(f"Market Data Overall Accuracy: {market_accuracy:.4f}")
    print("Market Data Level-wise Accuracies:", market_accuracies)
    print("Market Data Level-wise Correct Predictions:", market_correct)
    print("Market Data Level-wise Incorrect Predictions:", market_incorrect)
    plot_level_performance(market_accuracies, market_correct, market_incorrect, title="Market Data Cascade Performance")
else:
    print("Market data unavailable.")

# Synthetic data
X_synthetic, y_synthetic = generate_synthetic_data()
synthetic_models = train_cascade(X_synthetic, y_synthetic)
unpruned_synthetic, synthetic_accuracies, synthetic_correct, synthetic_incorrect = cascading_predict(synthetic_models, X_synthetic, y_synthetic)
synthetic_accuracy = accuracy_score([x[2] for x in unpruned_synthetic], [np.argmax(x[0]) for x in unpruned_synthetic])
print(f"Synthetic Data Overall Accuracy: {synthetic_accuracy:.4f}")
print("Synthetic Data Level-wise Accuracies:", synthetic_accuracies)
print("Synthetic Data Level-wise Correct Predictions:", synthetic_correct)
print("Synthetic Data Level-wise Incorrect Predictions:", synthetic_incorrect)
plot_level_performance(synthetic_accuracies, synthetic_correct, synthetic_incorrect, title="Synthetic Data Cascade Performance")


[*********************100%***********************]  1 of 1 completed


ValueError: Expected 2D array, got 1D array instead:
array=[].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [None]:
import joblib
import os

# Save cascade models to a directory
def save_cascade_models(models, directory="cascade_models"):
    if not os.path.exists(directory):
        os.makedirs(directory)
    for i, model in enumerate(models):
        filename = os.path.join(directory, f"model_level_{i+1}.joblib")
        joblib.dump(model, filename)
    print(f"Models saved to {directory}")

# Example: Save trained models
save_cascade_models(market_models, directory="/content/drive/MyDrive/Cascading_model")