In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st
import uuid
from datetime import datetime
import os

In [6]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [5]:
# Step 2: Load Dataset
df = pd.read_csv('dataset.csv')


In [7]:
# Setting random seed for reproducibility
np.random.seed(42)


In [8]:
# Function to calculate volatility (standard deviation of returns)
def calculate_volatility(df, window=7):
    df['Returns'] = df['close'].pct_change()
    df['Volatility'] = df['Returns'].rolling(window=window).std() * np.sqrt(252)  # Annualized volatility
    return df


In [12]:
# Step 3: Clean Data
df.drop(columns=['Unnamed: 0'], inplace=True)  # Drop index column
df['date'] = pd.to_datetime(df['date'])        # Convert to datetime


In [13]:
# Step 4: Feature Engineering
df = df.sort_values(by=['crypto_name', 'date'])  # Sort for rolling calculations


In [14]:
# Daily Return
df['daily_return'] = df.groupby('crypto_name')['close'].pct_change()


In [15]:
# Rolling Volatility (7-day)
df['rolling_volatility_7d'] = df.groupby('crypto_name')['daily_return'].rolling(window=7).std().reset_index(0, drop=True)


In [16]:
# Liquidity Ratio
df['liquidity_ratio'] = df['volume'] / df['marketCap']


In [17]:

# 7-day Moving Average of Close Price
df['ma_7_close'] = df.groupby('crypto_name')['close'].transform(lambda x: x.rolling(7).mean())


In [18]:
# Drop NaNs
df_cleaned = df.dropna()


In [19]:
# Step 5: EDA Visualizations for One Crypto (Bitcoin)
sample_crypto = "Bitcoin"
sample_df = df_cleaned[df_cleaned['crypto_name'] == sample_crypto]


In [20]:
# Plot 1: Close Price
sns.lineplot(data=sample_df, x='date', y='close')
plt.title("Bitcoin - Close Price Over Time")
plt.savefig("bitcoin_close_price.png")
plt.clf()

<Figure size 640x480 with 0 Axes>

In [21]:
# Plot 2: Rolling Volatility
sns.lineplot(data=sample_df, x='date', y='rolling_volatility_7d')
plt.title("Bitcoin - 7-Day Rolling Volatility")
plt.savefig("bitcoin_volatility.png")
plt.clf()

<Figure size 640x480 with 0 Axes>

In [22]:

# Plot 3: Liquidity Ratio
sns.lineplot(data=sample_df, x='date', y='liquidity_ratio')
plt.title("Bitcoin - Liquidity Ratio Over Time")
plt.savefig("bitcoin_liquidity.png")
plt.clf()

<Figure size 640x480 with 0 Axes>