In [131]:
import pandas as pd
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt
import matplotlib.pyplot as plt

# Load your DataFrame
data = pd.read_csv('WK_1-5.csv')  # Replace 'your_data.csv' with the actual file path
data = data.dropna(subset=['WON'])


columns_to_check = [
    "PASS_COMP", "PASS_YDS", "PASS_TD", "INT",
    "CAR", "RUSH_YDS", "RUSH_TD", "REC",
    "REC_YARDS", "REC_TD", "TARGETS", "2PC",
    "FUML", "MISC_TD"
]
data["DnP"] = (data[columns_to_check] == 0).all(axis=1)


# Define the features and target variable
features = ['NAME', 'POS', 'TEAM', 'OPP', '@HOME', 'WEEK', 'DnP']
target = 'FPTS'


label_encoders = {}
categorical_columns = ['POS', 'TEAM', 'OPP', 'NAME', '@HOME', 'DnP']
for column in categorical_columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])



# Sort your data by week
data.sort_values(by='WEEK', inplace=True)
# Split the data by week
train_data = data[data['WEEK'] <= 4]  # Use the first four weeks for training
test_data = data[data['WEEK'] == 5]   # Use the fifth week for testing
# Define the features and target variable
# Separate features and target variables for training and testing
X_train = train_data[features]
y_train = train_data[target]
X_test = test_data[features]
y_test = test_data[target]
# Create and train the XGBoost model
model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
model.fit(X_train, y_train)
# Make predictions on the test set
y_pred = model.predict(X_test)
# Evaluate the model using RMSE (Root Mean Squared Error)
rmse = sqrt(mean_squared_error(y_test, y_pred))
print(f"Root Mean Squared Error: {rmse}")





# Get feature importances from the model
# feature_importance = model.feature_importances_
# feature_names = model.get_booster().feature_names
# feature_importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importance})
# feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)
# plt.figure(figsize=(10, 6))
# plt.barh(feature_importance_df['Feature'], feature_importance_df['Importance'])
# plt.xlabel('Importance')
# plt.ylabel('Feature')
# plt.title('Feature Importance')
# plt.show()







Root Mean Squared Error: 6.06603411071511


In [75]:
df = pd.read_csv('WK_1-5.csv')
unique_teams = df['TEAM'].unique()
print(unique_teams)

position_group = 'QB'  # Replace with the desired position group
# Filter the DataFrame to include only the desired position group
filtered_df = df[df['POS'] == position_group]
# Get the unique players from the filtered DataFrame
unique_players = filtered_df['NAME'].unique()
# Print the unique players in the specified position group
print(unique_players)

['Mia' 'NE' 'GB' 'Ind' 'LAC' 'Cle' 'KC' 'Jax' 'SF' 'Min' 'TB' 'Den' 'Wsh'
 'LV' 'NO' 'Chi' 'LAR' 'Det' 'Phi' 'Hou' 'Pit' 'Sea' 'Buf' 'Atl' 'NYJ'
 'Car' 'Bal' 'Dal' 'NYG' 'Cin' 'Ten' 'Ari']
['Tua Tagovailoa' 'Mac Jones' 'Jordan Love' 'Anthony Richardson'
 'Justin Herbert' 'Deshaun Watson' 'Patrick Mahomes' 'Trevor Lawrence'
 'Brock Purdy' 'Kirk Cousins' 'Baker Mayfield' 'Russell Wilson'
 'Sam Howell' 'Jimmy Garoppolo' 'Derek Carr' 'Justin Fields'
 'Matthew Stafford' 'Jared Goff' 'Jalen Hurts' 'C.J. Stroud'
 'Kenny Pickett' 'Geno Smith' 'Josh Allen' 'Desmond Ridder' 'Zach Wilson'
 'Bryce Young' 'Lamar Jackson' 'Dak Prescott' 'Daniel Jones' 'Joe Burrow'
 'Ryan Tannehill' 'Joshua Dobbs' 'Tyrod Taylor' 'Aaron Rodgers'
 'Kyler Murray' 'Gardner Minshew' 'Tom Brady' 'Jameis Winston'
 'Trey Lance' "Aidan O'Connell" 'Mitch Trubisky' 'Will Levis'
 'Andy Dalton' 'Kyle Trask' 'Marcus Mariota' 'Hendon Hooker' 'Mike White'
 'Dorian Thompson-Robinson' 'Stetson Bennett' 'Taylor Heinicke'
 'Jacoby Briss

In [147]:



# Create a new data point for prediction with the same features
player = 'Christian Kirk'
position = 'WR'
team = 'Jax'
Opp = 'Ind'
at_home = True
week = 6
DnP = False
new_data_point = {
    'NAME': label_encoders['NAME'].transform([player])[0],
    'POS': label_encoders['POS'].transform([position])[0],
    'TEAM': label_encoders['TEAM'].transform([team])[0],
    'OPP': label_encoders['OPP'].transform([Opp])[0], 
    '@HOME': label_encoders['@HOME'].transform([at_home])[0], 
    'WEEK': week,
    'DnP': label_encoders['DnP'].transform([DnP])[0]
}
# Create a DataFrame with the new data point
new_data_df = pd.DataFrame([new_data_point])
# Use the model to make predictions
predicted_fpts = model.predict(new_data_df)
# Display the predicted fantasy points
print(f"Predicted points: {player} {predicted_fpts[0]:.2f}")


Predicted points: Christian Kirk 15.66
