In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
import pandas as pd
import joblib

# Load the trained model
logreg_model = joblib.load('logreg_model2.pkl')

# Load the test data (week 9)
test_feat = pd.read_csv('../data/processed_data/testing_features.csv')

# Load the training data to ensure feature consistency
train_features = pd.read_csv('../data/processed_data/training_features.csv')

# Check that the features in the test set match the training set
test_features = test_feat[train_features.columns]  # Align columns between train and test set

# Fit the scaler on the training data
scaler = StandardScaler()
scaler.fit(train_features)  # Fit the scaler on the training data

# Scale the test features using the fitted scaler
X_test_scaled = scaler.transform(test_features)  # Only transform the test data, don't fit

# Load the true target labels (play_type) for the test data
y_test = pd.read_csv('../data/processed_data/week_9_test_target.csv')  # Assuming this is the correct path

# Drop rows with NaN values in both the features and the target labels
test_feat_clean = test_feat.dropna()
y_test_clean = y_test.loc[test_feat_clean.index]  # Ensure you're matching indices after dropping NaNs

print(f"Clean Test Features rows: {test_feat_clean.shape[0]}")
print(f"Clean Test Labels rows: {y_test_clean.shape[0]}")

# Step 3: Make predictions on the scaled test data
y_test_pred = logreg_model.predict(X_test_scaled)

# Step 4: Evaluate the model on the test data
accuracy = accuracy_score(y_test_clean, y_test_pred)
print(f"Test Accuracy: {accuracy:.4f}")

# Classification report for precision, recall, and F1-score
print("Classification Report:")
print(classification_report(y_test_clean, y_test_pred))


Clean Test Features rows: 1461
Clean Test Labels rows: 1461
Test Accuracy: 0.5496
Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.98      0.66       652
           1       0.94      0.20      0.33       809

    accuracy                           0.55      1461
   macro avg       0.72      0.59      0.50      1461
weighted avg       0.74      0.55      0.48      1461



