# 🚀 Fantasy AI GPU Training - COMPLETE VERSION

This notebook is the FINAL version with everything integrated.

**Just click Runtime → Run all and wait for results!**

In [None]:
# Setup and imports
!pip install tensorflow scikit-learn xgboost pandas numpy supabase python-dotenv joblib scipy -q

import tensorflow as tf
import pandas as pd
import numpy as np
from supabase import create_client, Client
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
from datetime import datetime
import json
import os

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU available: {tf.config.list_physical_devices('GPU')}")

In [None]:
# Supabase connection
SUPABASE_URL = "https://pvekvqiqrrpugfmpgaup.supabase.co"
SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InB2ZWt2cWlxcnJwdWdmbXBnYXVwIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTc1MTA0NTA1MiwiZXhwIjoyMDY2NjIxMDUyfQ.EzHZ-WJkjbCXEAVP750VEp38ge35nsjVQ_ajzXadbPE"

supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
print("✅ Connected to Supabase")

In [None]:
# Load all data
print("📊 Loading data from Supabase...")

# Load games
games = []
offset = 0
while True:
    batch = supabase.table('games').select('*').not_.is_('home_score', 'null').not_.is_('away_score', 'null').range(offset, offset + 999).execute()
    if not batch.data:
        break
    games.extend(batch.data)
    offset += 1000
    if offset % 5000 == 0:
        print(f"  Loaded {len(games)} games...")

print(f"✅ Loaded {len(games)} games")

# Load supplementary data
player_stats = supabase.table('player_stats').select('*').execute().data
injuries = supabase.table('player_injuries').select('*').execute().data
weather = supabase.table('weather_data').select('*').execute().data
sentiment = supabase.table('social_sentiment').select('*').execute().data

print(f"✅ Loaded {len(player_stats)} player stats")
print(f"✅ Loaded {len(injuries)} injuries")
print(f"✅ Loaded {len(weather)} weather records")
print(f"✅ Loaded {len(sentiment)} sentiment records")

# Convert to DataFrames
df_games = pd.DataFrame(games)
df_stats = pd.DataFrame(player_stats)
df_injuries = pd.DataFrame(injuries)
df_weather = pd.DataFrame(weather)
df_sentiment = pd.DataFrame(sentiment)

In [None]:
# Feature engineering function
print("🔧 Engineering features...")

# Create the complete feature engineering code as a string
feature_eng_code = '''
def engineer_features(games_df, stats_df, injuries_df, weather_df, sentiment_df):
    features = []
    labels = []
    
    # Create lookup tables
    stats_by_game = stats_df.groupby(\'game_id\').agg({
        \'points\': [\'mean\', \'sum\', \'max\'],
        \'rebounds\': [\'mean\', \'sum\'],
        \'assists\': [\'mean\', \'sum\'],
        \'turnovers\': [\'mean\', \'sum\']
    }).to_dict(\'index\')
    
    injuries_by_team = injuries_df.groupby(\'team_id\')[\'severity\'].count().to_dict()
    weather_by_game = {w[\'game_id\']: w for w in weather_df.to_dict(\'records\') if w.get(\'game_id\')}
    sentiment_by_team = sentiment_df.groupby(\'team_id\')[\'sentiment_score\'].mean().to_dict()
    
    # Calculate team statistics
    team_stats = {}
    for _, game in games_df.iterrows():
        home_id = game[\'home_team_id\']
        away_id = game[\'away_team_id\']
        
        # Initialize team stats
        for team_id in [home_id, away_id]:
            if team_id not in team_stats:
                team_stats[team_id] = {
                    \'games\': 0, \'wins\': 0, \'losses\': 0,
                    \'points_for\': 0, \'points_against\': 0,
                    \'recent_form\': []
                }
        
        # Process each game
        if pd.notna(game[\'home_score\']) and pd.notna(game[\'away_score\']):
            home_stats = team_stats[home_id]
            away_stats = team_stats[away_id]
            
            # Skip if not enough history
            if home_stats[\'games\'] < 5 or away_stats[\'games\'] < 5:
                # Update stats for next game
                home_won = game[\'home_score\'] > game[\'away_score\']
                
                home_stats[\'games\'] += 1
                away_stats[\'games\'] += 1
                home_stats[\'points_for\'] += game[\'home_score\']
                home_stats[\'points_against\'] += game[\'away_score\']
                away_stats[\'points_for\'] += game[\'away_score\']
                away_stats[\'points_against\'] += game[\'home_score\']
                
                if home_won:
                    home_stats[\'wins\'] += 1
                    away_stats[\'losses\'] += 1
                    home_stats[\'recent_form\'].append(1)
                    away_stats[\'recent_form\'].append(0)
                else:
                    home_stats[\'losses\'] += 1
                    away_stats[\'wins\'] += 1
                    home_stats[\'recent_form\'].append(0)
                    away_stats[\'recent_form\'].append(1)
                    
                home_stats[\'recent_form\'] = home_stats[\'recent_form\'][-10:]
                away_stats[\'recent_form\'] = away_stats[\'recent_form\'][-10:]
                continue
            
            # Extract features
            game_features = [
                # Basic team performance
                home_stats[\'wins\'] / home_stats[\'games\'],
                away_stats[\'wins\'] / away_stats[\'games\'],
                home_stats[\'points_for\'] / home_stats[\'games\'],
                away_stats[\'points_for\'] / away_stats[\'games\'],
                home_stats[\'points_against\'] / home_stats[\'games\'],
                away_stats[\'points_against\'] / away_stats[\'games\'],
                
                # Recent form
                np.mean(home_stats[\'recent_form\'][-5:]) if home_stats[\'recent_form\'] else 0.5,
                np.mean(away_stats[\'recent_form\'][-5:]) if away_stats[\'recent_form\'] else 0.5,
                
                # Win rate difference
                (home_stats[\'wins\'] / home_stats[\'games\']) - (away_stats[\'wins\'] / away_stats[\'games\']),
                
                # Scoring differential
                (home_stats[\'points_for\'] - home_stats[\'points_against\']) / home_stats[\'games\'],
                (away_stats[\'points_for\'] - away_stats[\'points_against\']) / away_stats[\'games\'],
                
                # Player stats
                stats_by_game.get(game[\'id\'], {}).get((\'points\', \'mean\'), (0,))[0] if game[\'id\'] in stats_by_game else 0,
                stats_by_game.get(game[\'id\'], {}).get((\'points\', \'sum\'), (0,))[0] if game[\'id\'] in stats_by_game else 0,
                
                # Injuries
                injuries_by_team.get(home_id, 0),
                injuries_by_team.get(away_id, 0),
                
                # Weather
                weather_by_game.get(game[\'id\'], {}).get(\'temperature\', 72) / 100 if game[\'id\'] in weather_by_game else 0.72,
                weather_by_game.get(game[\'id\'], {}).get(\'wind_speed\', 5) / 30 if game[\'id\'] in weather_by_game else 0.17,
                
                # Sentiment
                sentiment_by_team.get(home_id, 0),
                sentiment_by_team.get(away_id, 0),
                
                # Time features
                pd.to_datetime(game[\'created_at\']).hour / 24,
                pd.to_datetime(game[\'created_at\']).dayofweek / 7,
                pd.to_datetime(game[\'created_at\']).month / 12,
                
                # Home advantage
                1.0
            ]
            
            features.append(game_features)
            labels.append(1 if game[\'home_score\'] > game[\'away_score\'] else 0)
            
            # Update stats
            home_won = game[\'home_score\'] > game[\'away_score\']
            home_stats[\'games\'] += 1
            away_stats[\'games\'] += 1
            home_stats[\'points_for\'] += game[\'home_score\']
            home_stats[\'points_against\'] += game[\'away_score\']
            away_stats[\'points_for\'] += game[\'away_score\']
            away_stats[\'points_against\'] += game[\'home_score\']
            
            if home_won:
                home_stats[\'wins\'] += 1
                away_stats[\'losses\'] += 1
                home_stats[\'recent_form\'].append(1)
                away_stats[\'recent_form\'].append(0)
            else:
                home_stats[\'losses\'] += 1
                away_stats[\'wins\'] += 1
                home_stats[\'recent_form\'].append(0)
                away_stats[\'recent_form\'].append(1)
                
            home_stats[\'recent_form\'] = home_stats[\'recent_form\'][-10:]
            away_stats[\'recent_form\'] = away_stats[\'recent_form\'][-10:]
    
    return np.array(features), np.array(labels)
'''

# Execute the function definition
exec(feature_eng_code)

# Now run feature engineering
X, y = engineer_features(df_games, df_stats, df_injuries, df_weather, df_sentiment)
print(f"✅ Created {len(X)} samples with {len(X[0])} features each")

# Split and scale
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

print(f"✅ Train: {len(X_train)}, Val: {len(X_val)}, Test: {len(X_test)}")

In [None]:
# Train Neural Network
print("🧠 Training Neural Network on GPU...")

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(512, activation='relu', kernel_initializer='he_normal'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(256, activation='relu', kernel_initializer='he_normal'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(128, activation='relu', kernel_initializer='he_normal'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation='relu', kernel_initializer='he_normal'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation='relu', kernel_initializer='he_normal'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=15, restore_best_weights=True, mode='max'),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.5, patience=5, min_lr=0.00001, mode='max')
]

history = model.fit(
    X_train_scaled, y_train,
    validation_data=(X_val_scaled, y_val),
    epochs=100,
    batch_size=128,
    callbacks=callbacks,
    verbose=1
)

nn_test_acc = model.evaluate(X_test_scaled, y_test, verbose=0)[1]
print(f"\n✅ Neural Network Test Accuracy: {nn_test_acc:.2%}")

In [None]:
# Train XGBoost
print("\n🌲 Training XGBoost...")

xgb_params = {
    'n_estimators': 500,
    'max_depth': 8,
    'learning_rate': 0.05,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'objective': 'binary:logistic',
    'tree_method': 'gpu_hist',
    'gpu_id': 0,
    'random_state': 42
}

xgb_model = xgb.XGBClassifier(**xgb_params)
xgb_model.fit(X_train, y_train, eval_set=[(X_val, y_val)], early_stopping_rounds=20, verbose=False)

xgb_test_acc = xgb_model.score(X_test, y_test)
print(f"✅ XGBoost Test Accuracy: {xgb_test_acc:.2%}")

In [None]:
# Create Ensemble
print("\n🎯 Creating Ensemble Model...")

nn_pred_proba = model.predict(X_test_scaled).flatten()
xgb_pred_proba = xgb_model.predict_proba(X_test)[:, 1]

ensemble_pred_proba = (nn_pred_proba + xgb_pred_proba) / 2
ensemble_pred = (ensemble_pred_proba > 0.5).astype(int)
ensemble_acc = (ensemble_pred == y_test).mean()

print(f"\n🏆 FINAL ENSEMBLE ACCURACY: {ensemble_acc:.2%}")

# Optimize weights
from scipy.optimize import minimize

def ensemble_loss(weights):
    pred = weights[0] * nn_pred_proba + weights[1] * xgb_pred_proba
    pred_binary = (pred > 0.5).astype(int)
    return -(pred_binary == y_test).mean()

result = minimize(ensemble_loss, [0.5, 0.5], bounds=[(0, 1), (0, 1)], 
                  constraints={'type': 'eq', 'fun': lambda w: w[0] + w[1] - 1})

optimal_weights = result.x
final_pred_proba = optimal_weights[0] * nn_pred_proba + optimal_weights[1] * xgb_pred_proba
final_pred = (final_pred_proba > 0.5).astype(int)
final_acc = (final_pred == y_test).mean()

print(f"\n🔥 OPTIMIZED ENSEMBLE ACCURACY: {final_acc:.2%}")
print(f"Optimal weights: NN={optimal_weights[0]:.3f}, XGB={optimal_weights[1]:.3f}")

In [None]:
# Save everything
print("\n💾 Saving models...")

os.makedirs('fantasy_ai_gpu_models', exist_ok=True)

model.save('fantasy_ai_gpu_models/neural_network.h5')
xgb_model.save_model('fantasy_ai_gpu_models/xgboost_model.json')

import joblib
joblib.dump(scaler, 'fantasy_ai_gpu_models/scaler.pkl')

metadata = {
    'timestamp': datetime.now().isoformat(),
    'total_games': len(games),
    'training_samples': len(X_train),
    'features': X_train.shape[1],
    'accuracy': {
        'neural_network': float(nn_test_acc),
        'xgboost': float(xgb_test_acc),
        'ensemble': float(ensemble_acc),
        'optimized_ensemble': float(final_acc)
    },
    'ensemble_weights': {
        'neural_network': float(optimal_weights[0]),
        'xgboost': float(optimal_weights[1])
    }
}

with open('fantasy_ai_gpu_models/metadata.json', 'w') as f:
    json.dump(metadata, f, indent=2)

!cd fantasy_ai_gpu_models && zip -r ../fantasy_ai_gpu_models.zip *

print("\n" + "="*50)
print("🏆 FINAL RESULTS:")
print("="*50)
print(f"Neural Network: {nn_test_acc:.2%}")
print(f"XGBoost: {xgb_test_acc:.2%}")
print(f"Simple Ensemble: {ensemble_acc:.2%}")
print(f"Optimized Ensemble: {final_acc:.2%}")
print("="*50)

In [None]:
# Download
from google.colab import files
files.download('fantasy_ai_gpu_models.zip')
print("\n📥 Download complete! Save this to your project.")