Step 1: Data Collection

        * Download the dataset from the provided Google Drive link.

        * Verify file formats (CSV recommended).

        * Combine multiple CSVs if needed for 2016–2017 data.

In [None]:
import pandas as pd
import glob

# Example: load all CSVs in a folder
path = "path_to_downloaded_folder/*.csv"
all_files = glob.glob(path)

df_list = [pd.read_csv(file) for file in all_files]
data = pd.concat(df_list, ignore_index=True)
print(data.head())


Step 2: Data Preprocessing

       * Handle missing values: drop or impute with mean/median.

       * Ensure consistency: check duplicates, correct datatypes.

       * Normalize/scale features (especially price and volume).

       * Convert timestamps to datetime.

In [None]:
# Handling missing values
data = data.fillna(method='ffill')  # forward fill missing values

# Convert timestamp
data['Date'] = pd.to_datetime(data['Date'])

# Normalize numerical features
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
numerical_cols = ['Open', 'Close', 'High', 'Low', 'Volume']
data[numerical_cols] = scaler.fit_transform(data[numerical_cols])


Step 3: Exploratory Data Analysis (EDA)

         * Visualize trends in price, volume, and liquidity.

         * Compute correlations to detect influential factors.

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Trend of closing price
plt.figure(figsize=(12,6))
plt.plot(data['Date'], data['Close'])
plt.title('Cryptocurrency Closing Price Trend')
plt.xlabel('Date')
plt.ylabel('Normalized Price')
plt.show()

# Correlation heatmap
plt.figure(figsize=(10,6))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm')
plt.show()


Step 4: Feature Engineering

         * Liquidity-related features:

         * Moving averages (MA): short-term & long-term.

         * Volatility: standard deviation over rolling window.

         * Liquidity ratio: Volume / Price.

In [None]:
# Moving averages
data['MA_7'] = data['Close'].rolling(window=7).mean()
data['MA_30'] = data['Close'].rolling(window=30).mean()

# Volatility
data['Volatility'] = data['Close'].rolling(window=7).std()

# Liquidity ratio
data['Liquidity_Ratio'] = data['Volume'] / data['Close']

# Fill NaN from rolling calculations
data.fillna(method='bfill', inplace=True)


Step 5: Model Selection

     Since this is time-series regression, options:

      * ML Models: Random Forest, XGBoost, Linear Regression.

      * Deep Learning: LSTM or simple MLP for sequence-based predictions.

      * Here, I’ll demonstrate a Multilayer Perceptron (MLP) example:

In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Features and target
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_7', 'MA_30', 'Volatility', 'Liquidity_Ratio']
target = 'Liquidity_Ratio'

X = data[features].values
y = data[target].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Build MLP model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(1, activation='linear')  # regression output
])

model.compile(optimizer='adam', loss='mse', metrics=['mae'])
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=32)


Step 6: Model Evaluation

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score

# Predict on test set
y_pred = model.predict(X_test)

# Metrics
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)
print(f"RMSE: {rmse:.4f}, R² Score: {r2:.4f}")

# Plot training loss
plt.figure(figsize=(10,5))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('MSE Loss')
plt.legend()
plt.show()


Step 7: Hyperparameter Tuning

       * Use GridSearchCV for classical ML models or Keras Tuner for deep learning.

       * Tune: learning rate, number of neurons, batch size, and number of layers.

Step 8: Local Deployment (Optional)

Deploy using Streamlit or Flask to create a simple interface:


In [None]:

# Example Streamlit code
# Save this as app.py and run `streamlit run app.py`
import streamlit as st
import numpy as np
import tensorflow as tf

model = tf.keras.models.load_model('mlp_liquidity_model.h5')

st.title("Cryptocurrency Liquidity Prediction")

# User inputs
open_price = st.number_input("Open Price")
high_price = st.number_input("High Price")
low_price = st.number_input("Low Price")
close_price = st.number_input("Close Price")
volume = st.number_input("Volume")

input_data = np.array([[open_price, high_price, low_price, close_price, volume, 0, 0, 0, 0]])  # add zeros for engineered features
prediction = model.predict(input_data)
st.write(f"Predicted Liquidity Ratio: {prediction[0][0]:.4f}")