# ðŸ§  Poly Prediction: Hybrid GPU Trainer

This notebook is currently running on a **Google Colab Tesla T4 GPU**.

### ðŸš€ One-Click Setup
Since Colab is in the cloud and your code is on your server, we will pull everything we need right here.

In [6]:
!pip install sentence-transformers sqlalchemy pandas numpy torch

import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import gc
import os
from sqlalchemy import create_engine
from sentence_transformers import SentenceTransformer
from torch.utils.data import DataLoader, TensorDataset, WeightedRandomSampler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"GPU Type: {torch.cuda.get_device_name(0)}")

Using device: cuda
GPU Type: Tesla T4


### 1. Upload Database (Support for .gz)
Upload **poly.db.gz** (it's faster!) or the regular **poly.db**.

In [7]:
from google.colab import files
import gzip
import shutil

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))
  
  if fn.endswith('.gz'):
    print("Decompressing poly.db.gz...")
    with gzip.open(fn, 'rb') as f_in:
      with open('poly.db', 'wb') as f_out:
        shutil.copyfileobj(f_in, f_out)
    print("Database decompressed successfully!")
  else:
    os.rename(fn, "poly.db")

KeyboardInterrupt: 

### 2. Feature Engineering Logic
We've moved the logic here so it doesn't need to 'import' from your local files.

In [None]:
def custom_engineer_features(db_path="poly.db"):
    engine = create_engine(f"sqlite:///{db_path}")
    
    print("Loading resolved events from database...")
    query = "SELECT * FROM events WHERE outcome IS NOT NULL"
    df = pd.read_sql(query, engine, parse_dates=['start_time', 'end_time'])

    if df.empty:
        print("No resolved events found.")
        return None, None

    print(f"Engineering features for {len(df)} samples...")
    
    df['time_to_event_days'] = (df['end_time'] - df['start_time']).dt.days
    
    print("Generating text embeddings (Batch Mode)...")
    st_model = SentenceTransformer('all-MiniLM-L6-v2')
    
    summaries = df['news_summary'].fillna('').tolist()
    all_embeddings = []
    batch_size = 1000
    
    for i in range(0, len(summaries), batch_size):
        batch = summaries[i:i + batch_size]
        batch_emb = st_model.encode(batch, show_progress_bar=False)
        all_embeddings.append(batch_emb)
        gc.collect()
    
    embeddings = np.vstack(all_embeddings)
    embedding_df = pd.DataFrame(embeddings, index=df.index, columns=[f'emb_{i}' for i in range(embeddings.shape[1])])
    
    category_dummies = pd.get_dummies(df['category'], prefix='cat')
    numerical_features = df[['initial_price', 'volume', 'time_to_event_days']].fillna(0)
    
    features = pd.concat([numerical_features, category_dummies, embedding_df], axis=1)
    features = features.apply(pd.to_numeric, errors='coerce').fillna(0).astype('float32')
    
    target = df['outcome'].astype(int)
    return features, target

features, target = custom_engineer_features()
print(f"Final Dataset Shape: {features.shape}")

### 3. Neural Architecture & Training

In [None]:
class NeuralPredictor(nn.Module):
    def __init__(self, input_dim):
        super(NeuralPredictor, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )
    def forward(self, x): return self.network(x)

def train_on_gpu(features, target):
    X_train_pd, X_test_pd, y_train_pd, y_test_pd = train_test_split(features, target, test_size=0.2, random_state=42, stratify=target)
    X_train = torch.FloatTensor(X_train_pd.values).to(device)
    y_train = torch.FloatTensor(y_train_pd.values).view(-1, 1).to(device)
    X_test = torch.FloatTensor(X_test_pd.values).to(device)
    y_test = torch.FloatTensor(y_test_pd.values).view(-1, 1).to(device)

    model = NeuralPredictor(features.shape[1]).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.BCELoss()
    
    print("Starting High-Speed GPU Training...")
    for epoch in range(20):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()
        if (epoch+1) % 5 == 0: print(f"Epoch {epoch+1} | Loss: {loss.item():.4f}")

    torch.save(model.state_dict(), "model_colab.pt")
    print("Training Finished. Model saved as model_colab.pt")
    files.download("model_colab.pt")

train_on_gpu(features, target)