In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_selection import SelectFromModel

# Load the dataset
file_path = 'injury_data.csv'
injury_data = pd.read_csv(file_path)

# Features and target variable
X = injury_data[['Player_Age', 'Player_Weight', 'Player_Height', 'Previous_Injuries',
                 'Training_Intensity', 'Recovery_Time']]
y = injury_data['Likelihood_of_Injury']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the feature values
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize models
random_forest = RandomForestClassifier(random_state=42)
neural_network = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)

# Grid Search for Random Forest
param_grid_rf = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}
grid_rf = GridSearchCV(RandomForestClassifier(random_state=42), param_grid_rf, cv=5)
grid_rf.fit(X_train_scaled, y_train)
best_rf = grid_rf.best_estimator_

# Grid Search for Neural Network
param_grid_nn = {
    'hidden_layer_sizes': [(100,), (150, 100), (200, 100)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd'],
    'learning_rate': ['constant', 'adaptive']
}
grid_nn = GridSearchCV(MLPClassifier(max_iter=500, random_state=42), param_grid_nn, cv=5)
grid_nn.fit(X_train_scaled, y_train)
best_nn = grid_nn.best_estimator_

# Train and evaluate Best Random Forest
y_pred_rf = best_rf.predict(X_test_scaled)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
report_rf = classification_report(y_test, y_pred_rf)

# Train and evaluate Best Neural Network
y_pred_nn = best_nn.predict(X_test_scaled)
accuracy_nn = accuracy_score(y_test, y_pred_nn)
report_nn = classification_report(y_test, y_pred_nn)

# Feature Importance & Selection for Random Forest (Optional: Improve accuracy by selecting the most important features)
feature_selector = SelectFromModel(best_rf, threshold="mean", max_features=5)
X_train_selected = feature_selector.fit_transform(X_train_scaled, y_train)
X_test_selected = feature_selector.transform(X_test_scaled)

# Retrain the Random Forest on selected features
best_rf.fit(X_train_selected, y_train)
y_pred_rf_selected = best_rf.predict(X_test_selected)
accuracy_rf_selected = accuracy_score(y_test, y_pred_rf_selected)
report_rf_selected = classification_report(y_test, y_pred_rf_selected)

# Print results
print("Best Random Forest Results:")
print(f"Accuracy: {accuracy_rf:.2f}")
print("Classification Report:")
print(report_rf)

print("\nBest Neural Network Results:")
print(f"Accuracy: {accuracy_nn:.2f}")
print("Classification Report:")
print(report_nn)

print("\nRandom Forest with Feature Selection Results:")
print(f"Accuracy: {accuracy_rf_selected:.2f}")
print("Classification Report:")
print(report_rf_selected)




Best Random Forest Results:
Accuracy: 0.56
Classification Report:
              precision    recall  f1-score   support

           0       0.54      0.55      0.54        95
           1       0.58      0.57      0.58       105

    accuracy                           0.56       200
   macro avg       0.56      0.56      0.56       200
weighted avg       0.56      0.56      0.56       200


Best Neural Network Results:
Accuracy: 0.54
Classification Report:
              precision    recall  f1-score   support

           0       0.52      0.53      0.52        95
           1       0.56      0.55      0.56       105

    accuracy                           0.54       200
   macro avg       0.54      0.54      0.54       200
weighted avg       0.54      0.54      0.54       200


Random Forest with Feature Selection Results:
Accuracy: 0.54
Classification Report:
              precision    recall  f1-score   support

           0       0.51      0.59      0.55        95
           1      