# 🚀 Cryptocurrency Liquidity Prediction Project
This project involves building a machine learning model to predict the liquidity level of cryptocurrencies using historical data.

## 1. 📥 Data Collection
We collect historical cryptocurrency data from CoinGecko for two dates.

In [None]:
import pandas as pd
import numpy as np

# Load the datasets
df1 = pd.read_csv("coin_gecko_2022-03-16.csv")
df2 = pd.read_csv("coin_gecko_2022-03-17.csv")

# Tag with date and merge
df1["date"] = "2022-03-16"
df2["date"] = "2022-03-17"
df = pd.concat([df1, df2], ignore_index=True)
df.head()

## 2. 🧹 Data Preprocessing
We clean the data by handling missing values and converting data types.

In [None]:
numeric_cols = ["price", "1h", "24h", "7d", "24h_volume", "mkt_cap"]
df = df.dropna()
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')
df = df.dropna(subset=numeric_cols)
df.reset_index(drop=True, inplace=True)
df.info()

## 3. 📊 Exploratory Data Analysis (EDA)
We explore data trends, correlations, and distributions to understand patterns.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Volume trend
top_volume = df.groupby("coin")["24h_volume"].mean().nlargest(10)
sns.barplot(x=top_volume.index, y=top_volume.values)
plt.xticks(rotation=45)
plt.title("Top 10 Coins by Avg 24h Volume")
plt.show()

# Correlation heatmap
sns.heatmap(df[numeric_cols].corr(), annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap")
plt.show()

## 4. 🛠️ Feature Engineering
We derive new features to better capture market behavior.

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled = scaler.fit_transform(df[numeric_cols])
scaled_df = pd.DataFrame(scaled, columns=[f"{col}_scaled" for col in numeric_cols])
df = pd.concat([df, scaled_df], axis=1)

# New features
df["volume_to_mktcap"] = df["24h_volume"] / df["mkt_cap"]
df["volatility_24h"] = df["24h"].abs()
df["volatility_7d"] = df["7d"].abs()

# Scale new features
new_feats = ["volume_to_mktcap", "volatility_24h", "volatility_7d"]
scaled_new = scaler.fit_transform(df[new_feats])
scaled_new_df = pd.DataFrame(scaled_new, columns=[f"{col}_scaled" for col in new_feats])
df = pd.concat([df, scaled_new_df], axis=1)
df.head()

## 5. 🤖 Model Selection
## 6. 🏋️ Model Training
We choose Random Forest for its robustness and train on the processed features.

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# Features and target
X = df[[
    "price_scaled", "1h_scaled", "24h_scaled", "7d_scaled",
    "24h_volume_scaled", "mkt_cap_scaled",
    "volatility_24h_scaled", "volatility_7d_scaled"
]]
y = df["volume_to_mktcap"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestRegressor(n_estimators=100, max_depth=4, random_state=42)
model.fit(X_train, y_train)

## 7. 📈 Model Evaluation
We evaluate the model performance using standard regression metrics.

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

y_pred = model.predict(X_test)
print("R2 Score:", r2_score(y_test, y_pred))
print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))

## 8. 🔧 Hyperparameter Tuning
We try another Random Forest configuration manually to compare performance.

In [None]:
model2 = RandomForestRegressor(n_estimators=200, max_depth=6, min_samples_split=5, random_state=42)
model2.fit(X_train, y_train)
y_pred2 = model2.predict(X_test)

print("Model 2 R2:", r2_score(y_test, y_pred2))
print("Model 2 MAE:", mean_absolute_error(y_test, y_pred2))

## 9. 🧪 Model Testing & Validation
We compare actual vs predicted liquidity levels to validate the model.

In [None]:
results = pd.DataFrame({"Actual": y_test.values, "Predicted": y_pred})
results.head()

## 10. 🚀 Local Deployment Preparation
We save the trained model and scaler to use in a Streamlit web app.

In [None]:
import joblib
joblib.dump(model, "rf_model.pkl")
joblib.dump(scaler, "scaler.pkl")