In [None]:
print(data_df.columns)

df_model_clean = df_model.drop(columns=[col for col in df_model.columns if 'Name_' in col or col == 'Player'])
# Now, check the remaining columns
print(df_model_clean.columns)

# Define the feature set (X) and target variable (y)
X = df_model_clean.drop(columns=['FantPt', 'Position'])  # Drop 'FantPt' as it's the target
y = df_model_clean['FantPt']  # 'FantPt' is our target variable

In [None]:
traintest split ###

from sklearn.model_selection import train_test_split
# Split the data into training and testing sets (80% for training, 20% for testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Print the shape of the splits to verify
print(f"Training data shape (X_train): {X_train.shape}")
print(f"Test data shape (X_test): {X_test.shape}")




In [None]:

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
# Initialize Linear Regression
linear_model = LinearRegression()
# Train the model
linear_model.fit(X_train, y_train)
# Make predictions
y_pred_linear = linear_model.predict(X_test)
# Evaluate the model
mse_linear = mean_squared_error(y_test, y_pred_linear)
r2_linear = r2_score(y_test, y_pred_linear)
print(f"Linear Regression - Mean Squared Error: {mse_linear}")
print(f"Linear Regression - R-squared: {r2_linear}")


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
# Define the model (Random Forest Regressor)
rf_model = RandomForestRegressor(random_state=42)
# Define the hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 150],  # Number of trees in the forest
    'max_depth': [None, 10, 20, 30],  # Maximum depth of the trees
    'min_samples_split': [2, 5, 10],  # Minimum samples required to split a node
    'min_samples_leaf': [1, 2, 4],    # Minimum samples required to be at a leaf node
}
# Set up GridSearchCV
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2, scoring='neg_mean_squared_error')
# Fit the model to the data (this can take a while)
grid_search.fit(X_train, y_train)
# Get the best parameters
print(f"Best parameters found: {grid_search.best_params_}")
# Best model after grid search
best_rf_model = grid_search.best_estimator_
# Evaluate the best model
y_pred_rf = best_rf_model.predict(X_test)
# Evaluate performance
mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)
print(f"Random Forest - Mean Squared Error: {mse_rf}")
print(f"Random Forest - R-squared: {r2_rf}")

In [None]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score
# Define the model (XGBoost Regressor)
xg_model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
# Fit the model
xg_model.fit(X_train, y_train)
# Make predictions
y_pred_xg = xg_model.predict(X_test)
# Evaluate performance
mse_xg = mean_squared_error(y_test, y_pred_xg)
r2_xg = r2_score(y_test, y_pred_xg)
print(f"XGBoost - Mean Squared Error: {mse_xg}")
print(f"XGBoost - R-squared: {r2_xg}")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
# Make predictions using the best Random Forest model (from GridSearchCV or RandomizedSearchCV)
y_pred_rf = best_rf_model.predict(X_test)
# Plotting Actual vs Predicted Fantasy Points
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred_rf, color='blue', alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='red', linestyle='--')
plt.xlabel('Actual Fantasy Points')
plt.ylabel('Predicted Fantasy Points')
plt.title('Random Forest: Actual vs Predicted Fantasy Points')
plt.show()

In [None]:
# Make predictions using the XGBoost model
y_pred_xg = xg_model.predict(X_test)
# Plotting Actual vs Predicted Fantasy Points for XGBoost
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred_xg, color='green', alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='red', linestyle='--')
plt.xlabel('Actual Fantasy Points')
plt.ylabel('Predicted Fantasy Points')
plt.title('XGBoost: Actual vs Predicted Fantasy Points')
plt.show()

In [None]:
import joblib
from sklearn.ensemble import RandomForestRegressor
# Assuming X_train and y_train are already defined
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
# Train the model
rf_model.fit(X_train, y_train)
# Save the trained model to a .pkl file
joblib.dump(rf_model, 'best_rf_model.pkl')

In [None]:
# Load the trained model
rf_model = joblib.load('best_rf_model.pkl')

In [None]:
import streamlit as st
import joblib
# Load the trained model
model = joblib.load('best_rf_model.pkl')
# Streamlit user interface
st.title('Fantasy Points Prediction')
# User inputs
player_stats = st.text_input("Enter player stats: ")
# Make prediction when button is clicked
if st.button('Predict Fantasy Points'):
    input_data = [float(x) for x in player_stats.split(',')]
    prediction = model.predict([input_data])
    st.write(f"Predicted Fantasy Points: {prediction[0]}")