In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
class FantasyFootballPredictor:
    def __init__(self):
        # Experience-based confidence factors
        self.experience_factors = {
            'Rook': 0.7,  # 30% discount for rookies
            1: 0.8,      # 20% discount for 1 year experience
            2: 0.9,      # 10% discount for 2 years experience
            3: 0.95,     # 5% discount for 3 years experience
            4: 1.0       # Full value for 4+ years experience
        }
        
        # Position-specific scoring weights
        self.position_weights = {
            'QB': {
                'primary': ['Yds_avg', 'TD_avg', 'Int_avg', 'Rate_avg', 'FL_avg'],
                'multipliers': [0.04, 4, -2, 0.1, -2]
            },
            'RB': {
                'primary': ['Yds_avg', 'TD_avg', 'Rec_avg', 'Y/R_avg', 'FL_avg'],
                'multipliers': [0.1, 6, 1, 0.1, -2]
            },
            'WR': {
                'primary': ['Rec_avg', 'Yds_avg', 'TD_avg', 'Tgt_avg', 'FL_avg'],
                'multipliers': [1, 0.1, 6, 0.05, -2]
            },
            'TE': {
                'primary': ['Rec_avg', 'Yds_avg', 'TD_avg', 'Tgt_avg', 'FL_avg'],
                'multipliers': [1, 0.1, 6, 0.05, -2]
            },
            'K': {
                'primary': ['FGM_avg', 'XPM_avg', 'FG%_avg', 'XP%_avg'],
                'multipliers': [3, 1, 0.1, 0.1]
            }
        }
        
        self.models = {}
        self.scalers = {}
    
    def get_experience_factor(self, years):
        """Calculate confidence factor based on experience"""
        if isinstance(years, str) and years.lower() == 'rook':
            return self.experience_factors['Rook']
        years = int(years) if isinstance(years, (int, float)) else 0
        return self.experience_factors.get(min(years, 4), 1.0)
    
    def calculate_sample_weight(self, games_played, experience):
        """Calculate sample weight based on games played and experience"""
        # Base weight from games played (sigmoid curve)
        base_weight = 1 / (1 + np.exp(-0.1 * (games_played - 20)))
        # Experience factor
        exp_factor = self.get_experience_factor(experience)
        return base_weight * exp_factor
    
    def calculate_fantasy_points(self, position, data):
        weights = self.position_weights[position]
        base_points = 0
        
        for stat, multiplier in zip(weights['primary'], weights['multipliers']):
            if stat in data:
                base_points += data[stat] * multiplier
        
        # Apply experience and sample size adjustments
        games_played = data.get('Games_Played', 0)
        experience = data.get('Experience', 0)
        confidence_factor = self.calculate_sample_weight(games_played, experience)
        
        return base_points * confidence_factor
    
    def load_and_prepare_data(self, position):
        try:
            df = pd.read_csv(f'Player_Roles/{position}_analysis.csv')
            
            # Calculate sample weights
            df['sample_weight'] = df.apply(
                lambda x: self.calculate_sample_weight(x['Games_Played'], x['Experience']), 
                axis=1
            )
            
            # Calculate fantasy points
            fantasy_points = []
            for _, row in df.iterrows():
                points = self.calculate_fantasy_points(position, row)
                fantasy_points.append(points)
            
            df['fantasy_points'] = fantasy_points
            
            # Select features based on position
            features = []
            if position == 'QB':
                features = ['Games_Played', 'Cmp_avg', 'Att_avg', 'Yds_avg', 'TD_avg', 
                          'Int_avg', 'Rate_avg', 'Sk_avg', 'Y/A_avg', 'FL_avg']
            elif position == 'RB':
                features = ['Games_Played', 'Att_avg', 'Yds_avg', 'Y/A_avg', 'TD_avg',
                          'Rec_avg', 'Y/R_avg', 'Ctch%_avg', 'FL_avg']
            elif position in ['WR', 'TE']:
                features = ['Games_Played', 'Tgt_avg', 'Rec_avg', 'Yds_avg', 'Y/R_avg',
                          'TD_avg', 'Ctch%_avg', 'Y/Tgt_avg', 'FL_avg']
            elif position == 'K':
                features = ['Games_Played', 'FGM_avg', 'FGA_avg', 'FG%_avg', 'XPM_avg', 
                          'XPA_avg', 'XP%_avg', 'Pts_avg']
                
            # Drop rows with NaN values
            df = df.dropna(subset=features + ['fantasy_points'])
            
            return df, features
            
        except Exception as e:
            print(f"Error loading {position} data: {str(e)}")
            return None, None
    
    def train_model(self, position):
        # Load and prepare data
        df, features = self.load_and_prepare_data(position)
        if df is None or len(df) < 10:
            return False
            
        # Split features and target
        X = df[features]
        y = df['fantasy_points']
        sample_weights = df['sample_weight']
        
        # Train/test split
        X_train, X_test, y_train, y_test, w_train, w_test = train_test_split(
            X, y, sample_weights, test_size=0.2, random_state=42
        )
        
        # Scale features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        # Define SVM parameters
        param_grid = {
            'C': [0.1, 1, 10, 100],
            'gamma': ['scale', 'auto', 0.1, 0.01],
            'kernel': ['rbf', 'linear']
        }
        
        # Train model with sample weights
        svm = SVR()
        grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='neg_mean_squared_error')
        grid_search.fit(X_train_scaled, y_train, sample_weight=w_train)
        
        best_model = grid_search.best_estimator_
        
        # Evaluate model
        y_pred = best_model.predict(X_test_scaled)
        mse = mean_squared_error(y_test, y_pred, sample_weight=w_test)
        r2 = r2_score(y_test, y_pred, sample_weight=w_test)
        
        print(f"\n{position} Model Performance:")
        print(f"Mean Squared Error: {mse:.2f}")
        print(f"R² Score: {r2:.2f}")
        
        self.models[position] = best_model
        self.scalers[position] = scaler
        
        return True
    
    def predict_players(self, position, top_n=10):
        if position not in self.models:
            print(f"No trained model available for {position}")
            return None
            
        # Load data
        df, features = self.load_and_prepare_data(position)
        if df is None:
            return None
            
        # Scale features
        X = df[features]
        X_scaled = self.scalers[position].transform(X)
        
        # Make predictions
        predictions = self.models[position].predict(X_scaled)
        
        # Apply confidence adjustments to predictions
        df['predicted_points'] = predictions * df['sample_weight']
        
        # Sort by predicted points and get top N players
        top_players = df.nlargest(top_n, 'predicted_points')[
            ['Player', 'Team', 'Age', 'Experience', 'Games_Played', 'sample_weight', 
             'predicted_points'] + features
        ]
        
        return top_players

In [3]:
def format_predictions(df, position):
    """Format predictions with additional confidence information"""
    cols = ['Player', 'Team', 'Experience', 'Games_Played', 'sample_weight', 'predicted_points']
    
    if position == 'K':
        additional_cols = ['FG%_avg', 'XP%_avg', 'Pts_avg']
    elif position == 'QB':
        additional_cols = ['Yds_avg', 'TD_avg', 'Int_avg', 'Rate_avg', 'FL_avg']
    elif position == 'RB':
        additional_cols = ['Yds_avg', 'TD_avg', 'Rec_avg', 'FL_avg']
    else:  # WR/TE
        additional_cols = ['Rec_avg', 'Yds_avg', 'TD_avg', 'FL_avg']
        
    cols.extend(additional_cols)
    return df[cols].round(2)

In [4]:
def save_predictions(df, position, output_dir='FF_Predictions'):
    """Save full prediction results to CSV file"""
    # Create output directory if it doesn't exist
    import os
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Sort by predicted points descending
    df_sorted = df.sort_values('predicted_points', ascending=False)
    
    # Save to CSV
    filename = os.path.join(output_dir, f'{position}_predictions.csv')
    df_sorted.to_csv(filename, index=False)
    print(f"\nSaved full {position} predictions to: {filename}")

In [5]:
def main():
    predictor = FantasyFootballPredictor()
    
    positions = ['QB', 'RB', 'WR', 'TE', 'K']
    for position in positions:
        print(f"\nTraining {position} model...")
        predictor.train_model(position)
        
        # Get and display top 10 
        print(f"\nTop 10 Predicted {position}s:")
        top_players = predictor.predict_players(position)
        if top_players is not None:
            formatted_predictions = format_predictions(top_players, position)
            print(formatted_predictions.to_string())
            
            # Save predictions to FF_Predictions directory
            save_predictions(formatted_predictions, position)
            
if __name__ == "__main__":
    main()


Training QB model...

QB Model Performance:
Mean Squared Error: 3.11
R² Score: 0.96

Top 10 Predicted QBs:
              Player        Team Experience  Games_Played  Games_Played  sample_weight  predicted_points  Yds_avg  TD_avg  Int_avg  Rate_avg  FL_avg
2         Derek Carr      Saints         10           167           167           1.00             22.41   234.72    1.47     0.64     89.88    0.22
4         Jared Goff       Lions          8           128           128           1.00             22.08   233.73    1.44     0.64     86.35    0.23
0   Matthew Stafford        Rams         15           217           217           1.00             21.14   231.68    1.46     0.74     79.01    0.16
5     Baker Mayfield  Buccaneers          6           100           100           1.00             20.28   210.28    1.40     0.75     82.19    0.16
1       Kirk Cousins     Falcons         12           161           161           1.00             20.07   205.23    1.39     0.58     77.39    0.2