In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.metrics import confusion_matrix, classification_report
from google.colab import files

# Upload the CSV file
uploaded = files.upload()


# Load the dataset
bears_df = pd.read_csv('bears-gamelogs_1994-2023.csv', parse_dates=['Date'])

# Identify columns with missing values
columns_with_missing_values = bears_df.columns[bears_df.isnull().any()].tolist()

# Handle missing values for numeric columns
numeric_features = bears_df.select_dtypes(include=['int64', 'float64']).columns
bears_df[numeric_features] = bears_df[numeric_features].fillna(bears_df[numeric_features].mean())


label_encoder = LabelEncoder()
bears_df['Home/Away_Code'] = label_encoder.fit_transform(bears_df['Home/Away'])
bears_df['Opponent_Code'] = label_encoder.fit_transform(bears_df['Opp'])
bears_df['Day_Code'] = bears_df['Date'].dt.dayofweek


bears_df['Bears_Score'] = bears_df['Team Points']
bears_df['Opponent_Score'] = bears_df['Points Allowed']


predictors = [
    'Home/Away_Code', 'Opponent_Code', 'Day_Code',
    'Bears_Score', 'Opponent_Score'
]

X = bears_df[predictors]
y = (bears_df['Win/Loss'] == 'W').astype(int)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf = RandomForestClassifier(random_state=42)

param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10]
}

# Grid Search
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Get the best parameters
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

# Train RandomForestClassifier
rf_best = RandomForestClassifier(**best_params, random_state=42)
rf_best.fit(X_train, y_train)

# Make predictions
preds = rf_best.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, preds)
print("Accuracy:", accuracy)

preds = rf_best.predict(X_test)

# Confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, preds))

# Classification report
print("Classification Report:")
print(classification_report(y_test, preds))


Saving bears-gamelogs_1994-2023.csv to bears-gamelogs_1994-2023 (6).csv
Best Parameters: {'max_depth': None, 'min_samples_split': 5, 'n_estimators': 200}
Accuracy: 0.9896907216494846
Accuracy: 0.9896907216494846
Confusion Matrix:
[[52  0]
 [ 1 44]]
Classification Report:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99        52
           1       1.00      0.98      0.99        45

    accuracy                           0.99        97
   macro avg       0.99      0.99      0.99        97
weighted avg       0.99      0.99      0.99        97

