Training Pipeline

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib

In [2]:
def load_and_preprocess(filepath):
    # Load Data
    df = pd.read_csv("/content/sample_data/dataset.csv")
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values(by=['crypto_name', 'date'])

    # Feature Engineering
    # 1. Log Returns
    df['close_prev'] = df.groupby('crypto_name')['close'].shift(1)
    df['log_ret'] = np.log(df['close'] / df['close_prev'])

    # 2. Rolling Volatility (Features)
    df['volatility_7d'] = df.groupby('crypto_name')['log_ret'].transform(lambda x: x.rolling(window=7).std())
    df['volatility_30d'] = df.groupby('crypto_name')['log_ret'].transform(lambda x: x.rolling(window=30).std())

    # 3. ATR (Average True Range)
    df['high_low'] = df['high'] - df['low']
    df['high_prevclose'] = abs(df['high'] - df['close_prev'])
    df['low_prevclose'] = abs(df['low'] - df['close_prev'])
    df['tr'] = df[['high_low', 'high_prevclose', 'low_prevclose']].max(axis=1)
    df['atr_14'] = df.groupby('crypto_name')['tr'].transform(lambda x: x.rolling(window=14).mean())

    # 4. Bollinger Bands Width
    df['sma_20'] = df.groupby('crypto_name')['close'].transform(lambda x: x.rolling(window=20).mean())
    df['std_20'] = df.groupby('crypto_name')['close'].transform(lambda x: x.rolling(window=20).std())
    df['bb_upper'] = df['sma_20'] + 2 * df['std_20']
    df['bb_lower'] = df['sma_20'] - 2 * df['std_20']
    df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / df['sma_20']

    # 5. Liquidity Ratio
    df['liquidity_ratio'] = df['volume'] / df['marketCap']

    # 6. Target: Future Volatility (Next 7 days)
    df['target_volatility'] = df.groupby('crypto_name')['volatility_7d'].shift(-7)

    # Drop NaNs
    df_clean = df.dropna()
    return df_clean

In [3]:
def train_and_evaluate(df, crypto_symbol='Bitcoin'):
    # Filter for specific crypto
    data = df[df['crypto_name'] == crypto_symbol].copy()

    features = ['volatility_7d', 'volatility_30d', 'atr_14', 'bb_width', 'liquidity_ratio', 'volume', 'log_ret']
    target = 'target_volatility'

    X = data[features]
    y = data[target]

    # Time-Series Split (No shuffling)
    train_size = int(len(X) * 0.8)
    X_train, X_test = X.iloc[:train_size], X.iloc[train_size:]
    y_train, y_test = y.iloc[:train_size], y.iloc[train_size:]

    print(f"Training on {len(X_train)} records, Testing on {len(X_test)} records.")

    # Model Training
    model = RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42)
    model.fit(X_train, y_train)

    # Prediction
    y_pred = model.predict(X_test)

    # Evaluation
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f"\nModel Performance ({crypto_symbol}):")
    print(f"RMSE: {rmse:.4f}")
    print(f"MAE: {mae:.4f}")
    print(f"R2 Score: {r2:.4f}")

    # Save Model
    joblib.dump(model, 'volatility_model.pkl')
    print("\nModel saved as 'volatility_model.pkl'")

    return model, X_test, y_test, y_pred

if __name__ == "__main__":
    df = load_and_preprocess('dataset.csv')
    train_and_evaluate(df, 'Bitcoin')

Training on 1715 records, Testing on 429 records.

Model Performance (Bitcoin):
RMSE: 0.0318
MAE: 0.0276
R2 Score: -3.0194

Model saved as 'volatility_model.pkl'


Deployment

In [6]:
!pip install streamlit
!pip install streamlit plotly joblib pandas scikit-learn

Collecting streamlit
  Downloading streamlit-1.53.0-py3-none-any.whl.metadata (10 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.53.0-py3-none-any.whl (9.1 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m9.1/9.1 MB[0m [31m61.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m6.9/6.9 MB[0m [31m112.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.53.0


In [8]:
!pip install streamlit -q

In [21]:
import os
import time
import subprocess

# 1. Install necessary libraries
print("Installing libraries...")
subprocess.run(["pip", "install", "streamlit", "-q"])
subprocess.run(["wget", "-q", "-nc", "https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64"])
subprocess.run(["chmod", "+x", "cloudflared-linux-amd64"])

# 2. Create the app.py file using Python (no magic command needed)
app_code = """
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px

@st.cache_data
def load_data():
    try:
        # Load the dataset
        df = pd.read_csv('/content/sample_data/dataset.csv')
        df['date'] = pd.to_datetime(df['date'])
        df = df.sort_values(by=['crypto_name', 'date'])

        # Feature Engineering (Simplified for Demo)
        df['close_prev'] = df.groupby('crypto_name')['close'].shift(1)
        df['log_ret'] = np.log(df['close'] / df['close_prev'])
        df['volatility_7d'] = df.groupby('crypto_name')['log_ret'].transform(lambda x: x.rolling(window=7).std())

        return df.dropna()
    except Exception as e:
        return pd.DataFrame()

st.title("Cryptocurrency Volatility Forecaster")
st.write("Predicting market stability using Machine Learning.")

df = load_data()

if not df.empty:
    crypto_list = df['crypto_name'].unique()
    selected_crypto = st.selectbox("Select Cryptocurrency", crypto_list, index=0)

    # Filter Data
    crypto_data = df[df['crypto_name'] == selected_crypto].copy()

    # Charts
    st.plotly_chart(px.line(crypto_data, x='date', y='close', title=f'{selected_crypto} Price'))
    st.plotly_chart(px.line(crypto_data, x='date', y='volatility_7d', title=f'{selected_crypto} Volatility (7d)'))

    # Prediction Simulation
    if st.button("Predict Volatility"):
        last_vol = crypto_data.iloc[-1]['volatility_7d']
        predicted = last_vol * np.random.normal(1.0, 0.05)
        st.metric("Predicted Volatility", f"{predicted:.4f}", f"{predicted - last_vol:.4f}")
else:
    st.error("dataset.csv not found. Please upload the file to the Colab 'Files' section.")
"""

with open("app.py", "w") as f:
    f.write(app_code)

print("app.py created successfully!")

# 3. Run Streamlit and the Cloudflare Tunnel
print("Starting Streamlit app...")
# Kill old processes to free up the port
subprocess.run(["pkill", "-f", "streamlit"])
subprocess.run(["pkill", "-f", "cloudflared"])

# Start Streamlit in the background
subprocess.Popen(["streamlit", "run", "app.py", "--server.port", "8501"])

# Start Tunnel
with open("tunnel.log", "w") as log_file:
    subprocess.Popen(["./cloudflared-linux-amd64", "tunnel", "--url", "http://localhost:8501"], stdout=log_file, stderr=log_file)

print("Waiting for link... (approx 10 seconds)")
time.sleep(10)

# 4. Extract and print the link
found_link = False
for i in range(5):
    try:
        with open("tunnel.log", "r") as f:
            logs = f.read()
        import re
        matches = re.findall(r'https://[\w-]+\.trycloudflare\.com', logs)
        if matches:
            print("\n‚úÖ CLICK THIS LINK TO OPEN THE APP:")
            print(f"üëâ {matches[-1]}")
            found_link = True
            break
    except:
        pass
    time.sleep(2)

if not found_link:
    print("Link generation is taking longer than expected. Please check 'tunnel.log' manually.")

Installing libraries...
app.py created successfully!
Starting Streamlit app...
Waiting for link... (approx 10 seconds)

‚úÖ CLICK THIS LINK TO OPEN THE APP:
üëâ https://winners-load-shapes-barrier.trycloudflare.com


Reuirement Files

In [22]:
# 1. Create the requirements.txt file
requirements = """
streamlit
pandas
numpy
joblib
plotly
scikit-learn
"""
with open("requirements.txt", "w") as f:
    f.write(requirements)

# 2. Check if model exists, if not, warn user (but continue)
import os
if not os.path.exists("volatility_model.pkl"):
    print("‚ö†Ô∏è Warning: volatility_model.pkl not found. You may need to run the training code again.")

# 3. Zip all necessary files together
import shutil
files_to_zip = ['app.py', 'dataset.csv', 'requirements.txt']
if os.path.exists("volatility_model.pkl"):
    files_to_zip.append("volatility_model.pkl")

# Create a folder for the zip
os.makedirs("my_crypto_app", exist_ok=True)
for file in files_to_zip:
    if os.path.exists(file):
        shutil.copy(file, f"my_crypto_app/{file}")

# Zip it
shutil.make_archive("crypto_project_files", 'zip', "my_crypto_app")

from google.colab import files
files.download("crypto_project_files.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>