<a href="https://colab.research.google.com/github/JosephAFerguson/DeepLearningFinalProj/blob/main/DeepLearningJF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [203]:
import json, time, os
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from requests import Session
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [204]:
# ----------------------------
# 1. CoinMarketCap API Helper
# ----------------------------
class CryptoEndpoint:
    listingsEndpoint = "https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest"
    latestQuotes = "https://pro-api.coinmarketcap.com/v2/cryptocurrency/quotes/latest"
    historicalQuotes = "https://pro-api.coinmarketcap.com/v2/cryptocurrency/quotes/historical"

    def __init__(self, apikey) -> None:
        self.headers = {
            'Accepts': 'application/json',
            'X-CMC_PRO_API_KEY': apikey,
        }
        self.coinsInfo = {}
        self.coinsIds = []

    def GetCoinIdentifiers(self, limit=10):
        session = Session()
        session.headers.update(self.headers)
        response = session.get(
            url=self.listingsEndpoint,
            params={"limit": limit, "price_min": 1, "price_max": 20}
        )

        if response.status_code != 200:
            raise Exception(f"Error fetching listings: {response.text}")

        data = json.loads(response.text)

        for coin in data.get("data", []):
            self.coinsInfo[coin["name"]] = coin["id"]
            self.coinsIds.append(coin["id"])

        print(f"Loaded {len(self.coinsInfo)} coins.")
        return (self.coinsInfo, self.coinsIds)

    def GetSampleCoinHistoricalData(self, coin_id, days=60):
        """Fetch historical daily prices for one coin."""
        session = Session()
        session.headers.update(self.headers)

        end_time = datetime.utcnow()
        start_time = end_time - timedelta(days=days)

        params = {
            "id": coin_id,
            "time_start": start_time.isoformat(),
            "time_end": end_time.isoformat(),
            "interval": "24h",
        }

        response = session.get(url=self.historicalQuotes, params=params)
        if response.status_code != 200:
            print(f"Error for coin {coin_id}: {response.text}")
            return []

        data = json.loads(response.text)
        if data.get("status", {}).get("error_code") != 0:
            print(f"API error for {coin_id}: {data['status']}")
            return []

        coin_data = data.get("data", {})
        quotes = coin_data.get("quotes", [])
        if not quotes:
            return []

        historicals = []
        for quote in quotes:
            price = quote["quote"]["USD"]["price"]
            volume = quote["quote"]["USD"]["volume_24h"]
            marketcap = quote["quote"]["USD"]["market_cap"]
            circulating_supply = quote["quote"]["USD"]["circulating_supply"]
            total_supply = quote["quote"]["USD"]["total_supply"]
            historicals.append([price, volume, marketcap, circulating_supply, total_supply])

        return historicals

In [205]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim,
                 bidirectional=True, dropout=0.1):
        super().__init__()

        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.bidirectional = bidirectional
        self.num_directions = 2 if bidirectional else 1

        self.lstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=layer_dim,
            batch_first=True,
            dropout=dropout if layer_dim > 1 else 0.0,
            bidirectional=bidirectional
        )

        # Fully connected layer must match bidirectional hidden size:
        self.fc = nn.Linear(hidden_dim * self.num_directions, output_dim)

    def forward(self, x):
        batch_size = x.size(0)

        # Initial hidden states: [num_layers * num_directions, batch, hidden_dim]
        h0 = torch.zeros(self.layer_dim * self.num_directions, batch_size, self.hidden_dim).to(x.device)
        c0 = torch.zeros(self.layer_dim * self.num_directions, batch_size, self.hidden_dim).to(x.device)

        out, _ = self.lstm(x, (h0, c0))

        # Take the last timestep
        out = out[:, -1, :]  # shape: [batch, hidden_dim * num_directions]

        out = self.fc(out)
        return out   # logits for BCEWithLogitsLoss

In [206]:
# ----------------------------
# 3. Data Preparation Utility
# ----------------------------
def prepare_sequences(data_dict, window=10):
    """
    Convert dict {coin_id: [[price, volume], ...]} into windowed tensors
    Target: 1 if next price > current, else 0
    """
    scaler = MinMaxScaler()
    X, Y, labels = [], [], []

    for coin_id, seq in data_dict.items():
        arr = np.array(seq)
        if len(arr) <= window + 1:
            continue
        scaled = scaler.fit_transform(arr)
        prices = arr[:, 0]

        for i in range(len(scaled) - window - 1):
            X.append(scaled[i:i+window])
            next_dir = 1.0 if prices[i+window+1] > prices[i+window] else 0.0
            Y.append(next_dir)
            labels.append(coin_id)

    X = torch.tensor(np.array(X), dtype=torch.float32)
    Y = torch.tensor(np.array(Y), dtype=torch.float32).view(-1, 1)
    return X, Y, labels

In [207]:
def train_model(model, trainX, trainY, valX, valY, epochs=200, lr=0.01, patience=20):
    criterion = nn.BCEWithLogitsLoss()
    optimizer = Adam(model.parameters(), lr=lr)
    scheduler = ReduceLROnPlateau(optimizer, patience=10, factor=0.2)

    # Ensure tensors
    trainX = torch.tensor(trainX, dtype=torch.float32)
    trainY = torch.tensor(trainY, dtype=torch.float32)
    valX   = torch.tensor(valX,   dtype=torch.float32)
    valY   = torch.tensor(valY,   dtype=torch.float32)

    noImprovment = 0
    best_comb_acc = 0.0

    for epoch in range(epochs):
        # ----------------------------
        # Training
        # ----------------------------
        model.train()
        optimizer.zero_grad()

        outputs = model(trainX)
        loss = criterion(outputs, trainY)
        loss.backward()
        optimizer.step()

        # ----------------------------
        # Validation
        # ----------------------------
        model.eval()
        with torch.no_grad():
            val_outputs = model(valX)
            val_loss = criterion(val_outputs, valY)

            pred_train = torch.sigmoid(outputs)
            train_acc = (pred_train.round() == trainY).float().mean().item()

            pred_val = torch.sigmoid(val_outputs)
            val_acc = (pred_val.round() == valY).float().mean().item()

        # Step scheduler using validation loss
        scheduler.step(val_loss)
        if (train_acc+val_acc)/2 > best_comb_acc:
          noImprovment = 0
          best_comb_acc = (train_acc+val_acc)/2
        elif noImprovment>patience:
          return model
        else:
          noImprovment+=1

        # Status print every epoch
        print(
            f"Epoch [{epoch+1}/{epochs}] | "
            f"Train Loss: {loss.item():.5f} | Train Acc: {train_acc*100:.2f}% | "
            f"Val Loss: {val_loss.item():.5f} | Val Acc: {val_acc*100:.2f}%"
        )

    return model


In [208]:
# ----------------------------
# 5. Get and Save Data as CSV
# ----------------------------
def GetSaveData():
    apikey = input("Enter your CoinMarketCap API key: ").strip()
    ce = CryptoEndpoint(apikey)

    data_dir = "data"
    os.makedirs(data_dir, exist_ok=True)

    # Optionally control how "fresh" the data should be before refetching
    freshness_days = 1  # re-download if older than this many days

    coinsInfo, coinsIds = ce.GetCoinIdentifiers(limit=10)
    print("Fetching historical data (or loading cached files)...")

    all_data = {}

    for coinId in coinsIds:
        file_path = os.path.join(data_dir, f"{coinId}.csv")

        # Check if we already have cached data
        if os.path.exists(file_path):
            modified_time = datetime.fromtimestamp(os.path.getmtime(file_path))
            if datetime.now() - modified_time < timedelta(days=freshness_days):
                print(f"[INFO] Using cached data for {coinId}")
                df = pd.read_csv(file_path)
                all_data[coinId] = df
                continue
            else:
                print(f"[INFO] Cached data for {coinId} is old. Refetching...")

        # Fetch new data from API
        hist = ce.GetSampleCoinHistoricalData(coinId, days=60)
        if len(hist) > 0:
            df = pd.DataFrame(hist)
            df.to_csv(file_path, index=False)
            print(f"[INFO] Saved new data for {coinId} → {file_path}")
            all_data[coinId] = df
        else:
            print(f"[WARN] No data found for {coinId}")

        time.sleep(2)  # Avoid hitting API rate limits

    print(f"\n✅ Fetched or loaded data for {len(all_data)} coins.")
    return all_data

In [209]:
# ----------------------------
# 6. Main Execution
# ----------------------------
all_data = GetSaveData()

# Prepare sequences (X_all, Y_all, and matching labels)
X_all, Y_all, labels = prepare_sequences(all_data, window=10)

# Train / Val / Test split (60 / 20 / 20)
trainX, restX, trainY, restY = train_test_split(
    X_all, Y_all, test_size=0.4, shuffle=True
)
valX, testX, valY, testY = train_test_split(
    restX, restY, test_size=0.5, shuffle=True
)

print(f"Train Samples: {len(trainX)}")
print(f"Val Samples:   {len(valX)}")
print(f"Test Samples:  {len(testX)}")

# ----------------------------
# Train model
# ----------------------------
model = LSTMModel(input_dim=5, hidden_dim=64, layer_dim=2, output_dim=1)
trained_model = train_model(model, trainX, trainY, valX, valY, epochs=200, lr=0.01)

# ----------------------------
# Test Evaluation
# ----------------------------
trained_model.eval()

testX_t = torch.tensor(testX, dtype=torch.float32)
testY_t = torch.tensor(testY, dtype=torch.float32)

with torch.no_grad():
    logits = trained_model(testX_t)
    preds = torch.sigmoid(logits)
    pred_classes = preds.round()

    test_loss = nn.BCEWithLogitsLoss()(logits, testY_t).item()
    test_acc = (pred_classes == testY_t).float().mean().item()

print("\n===== TEST RESULTS =====")
print(f"Test Loss: {test_loss:.5f}")
print(f"Test Accuracy: {test_acc*100:.2f}%")
print("========================\n")


Enter your CoinMarketCap API key: a76bd6fc-2b66-4a25-843f-321def3437bd
Loaded 10 coins.
Fetching historical data (or loading cached files)...
[INFO] Using cached data for 3408
[INFO] Using cached data for 1975
[INFO] Using cached data for 3957
[INFO] Using cached data for 5805
[INFO] Using cached data for 11419
[INFO] Using cached data for 7083
[INFO] Using cached data for 6636
[INFO] Using cached data for 8916
[INFO] Using cached data for 6535
[INFO] Using cached data for 1321

✅ Fetched or loaded data for 10 coins.
Train Samples: 288
Val Samples:   96
Test Samples:  96
Epoch [1/200] | Train Loss: 0.69241 | Train Acc: 53.47% | Val Loss: 0.70997 | Val Acc: 50.00%


  trainX = torch.tensor(trainX, dtype=torch.float32)
  trainY = torch.tensor(trainY, dtype=torch.float32)
  valX   = torch.tensor(valX,   dtype=torch.float32)
  valY   = torch.tensor(valY,   dtype=torch.float32)


Epoch [2/200] | Train Loss: 0.69704 | Train Acc: 53.47% | Val Loss: 0.69586 | Val Acc: 50.00%
Epoch [3/200] | Train Loss: 0.69030 | Train Acc: 53.47% | Val Loss: 0.69376 | Val Acc: 50.00%
Epoch [4/200] | Train Loss: 0.69110 | Train Acc: 53.47% | Val Loss: 0.69365 | Val Acc: 50.00%
Epoch [5/200] | Train Loss: 0.69123 | Train Acc: 53.47% | Val Loss: 0.69391 | Val Acc: 50.00%
Epoch [6/200] | Train Loss: 0.69060 | Train Acc: 53.47% | Val Loss: 0.69463 | Val Acc: 50.00%
Epoch [7/200] | Train Loss: 0.68981 | Train Acc: 53.47% | Val Loss: 0.69593 | Val Acc: 50.00%
Epoch [8/200] | Train Loss: 0.68937 | Train Acc: 53.47% | Val Loss: 0.69756 | Val Acc: 50.00%
Epoch [9/200] | Train Loss: 0.68945 | Train Acc: 53.47% | Val Loss: 0.69837 | Val Acc: 50.00%
Epoch [10/200] | Train Loss: 0.68888 | Train Acc: 53.47% | Val Loss: 0.69797 | Val Acc: 50.00%
Epoch [11/200] | Train Loss: 0.68743 | Train Acc: 53.47% | Val Loss: 0.69701 | Val Acc: 50.00%
Epoch [12/200] | Train Loss: 0.68627 | Train Acc: 53.47% |

  testX_t = torch.tensor(testX, dtype=torch.float32)
  testY_t = torch.tensor(testY, dtype=torch.float32)
