In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier

# Function to evaluate model
def evaluate_model(model, X_test, y_test, model_name):
    predictions = model.predict(X_test)
    print(f"Classification Report for {model_name}:")
    print(classification_report(y_test, predictions))
    print("-" * 60)  # Separator

# Load your dataset
nba_data = pd.read_csv('nba_final.CSV')

# Splitting the dataset
X = nba_data.drop('Result', axis=1)
y = nba_data['Result']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1113)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize models with adjusted parameters
logreg = LogisticRegression(C=0.001, random_state=1113)  # Increased regularization
rf = RandomForestClassifier(n_estimators=10, max_depth=5, min_samples_split=20, random_state=1113)  # Simpler model
knn = KNeighborsClassifier(n_neighbors=20)  # More neighbors for smoothing
svm = SVC(C=0.01, random_state=1113)  # Increased regularization
gnb = GaussianNB(var_smoothing=1e-1)  # Increased smoothing
xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', n_estimators=10, max_depth=2, min_child_weight=10, gamma=1, random_state=1113)  # Simpler model

# Fit models
logreg.fit(X_train, y_train)
rf.fit(X_train, y_train)
knn.fit(X_train, y_train)
svm.fit(X_train, y_train)
gnb.fit(X_train, y_train)
xgb.fit(X_train, y_train)

# Evaluate each model
evaluate_model(logreg, X_test, y_test, "Logistic Regression")
evaluate_model(rf, X_test, y_test, "Random Forest")
evaluate_model(knn, X_test, y_test, "K-Nearest Neighbors")
evaluate_model(svm, X_test, y_test, "Support Vector Machine")
evaluate_model(gnb, X_test, y_test, "Gaussian Naive Bayes")
evaluate_model(xgb, X_test, y_test, "XGBoost")




Classification Report for Logistic Regression:
              precision    recall  f1-score   support

           0       0.85      0.64      0.73       210
           1       0.76      0.91      0.83       270

    accuracy                           0.79       480
   macro avg       0.81      0.77      0.78       480
weighted avg       0.80      0.79      0.79       480

------------------------------------------------------------
Classification Report for Random Forest:
              precision    recall  f1-score   support

           0       0.82      0.78      0.80       210
           1       0.83      0.87      0.85       270

    accuracy                           0.83       480
   macro avg       0.83      0.82      0.82       480
weighted avg       0.83      0.83      0.83       480

------------------------------------------------------------
Classification Report for K-Nearest Neighbors:
              precision    recall  f1-score   support

           0       0.71      0.71 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
