In [28]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report
from joblib import dump


In [29]:
data = pd.read_csv("injury_pred.csv")
data.head()

Unnamed: 0,Age,Position,Previous_Injuries,Type_of_Previous_Injury,Fitness_Level,Training_Hours_Per_Week,Sleep_Hours_Per_Night,Nutrition_Habits,Hydration_Level,Injury_Likelihood,Preventive_Techniques,Type_of_Injury_Predicted
0,34,Midfielder,0,-,High,7,7,Varied,Adequate,Low,Functional movement exercises,No Injury (Low Risk)
1,28,Forward,2,ACL Tear,High,9,7,Varied,Adequate,Moderate,Proprioception and balance training,Ligament Sprain
2,22,Forward,1,Wrist Sprain,High,9,6,Varied,Optimal,Moderate,Dynamic warm-up routines,Ligament Sprain
3,27,Forward,0,-,Moderate,6,8,Balanced,Adequate,Low,Periodic rest and recovery practices,No Injury (Low Risk)
4,25,Midfielder,1,Hamstring Strain,High,12,7,High Protein,Insufficient,Moderate,Proprioception and balance training,Ligament Sprain


In [30]:
data['Nutrition_Habits'] = data['Nutrition_Habits'].map({'Balanced': 1, 'Varied': 2, 'High Protein': 3})
data['Hydration_Level'] = data['Hydration_Level'].map({'Adequate': 1, 'Insufficient': 2, 'Optimal': 3})
data['Fitness_Level'] = data['Fitness_Level'].map({'Low': 1, 'Moderate': 2, 'High': 3})
data['Position'] = data['Position'].map({'Forward': 1, 'Midfielder': 2, 'Defender': 3, 'GoalKeeper': 4})

In [31]:
X = data[['Age', 'Previous_Injuries', 'Training_Hours_Per_Week', 'Sleep_Hours_Per_Night', 'Hydration_Level', 'Nutrition_Habits', 'Fitness_Level', 'Position']]
y = data[['Injury_Likelihood', 'Preventive_Techniques', 'Type_of_Injury_Predicted']]

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [33]:
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

In [46]:
model = RandomForestClassifier(n_estimators=30, random_state=42)
model.fit(X_train_imputed, y_train)

In [35]:
print("Please provide the following information:")
age = int(input("Age: "))
previous_injuries = int(input("Previous Injuries: "))
training_hours_per_week = int(input("Training Hours Per Week: "))
sleep_hours_per_night = int(input("Sleep Hours Per Night: "))
hydration_level = int(input("Hydration Level (1: Adequate, 2: Insufficient, 3: Optimal): "))
nutrition_habits = int(input("Nutrition Habits (1: Balanced, 2: Varied, 3: High Protein): "))
fitness_level = int(input("Fitness Level (1: Low, 2: Moderate, 3: High): "))

Please provide the following information:


In [36]:
position_mapping = {'Forward': 1, 'Midfielder': 2, 'Defender': 3, 'GoalKeeper': 4}
print("Select Position: (1) Forward, (2) Midfielder, (3) Defender, (4) GoalKeeper")
position_input = int(input("Enter the number corresponding to your position: "))
position = position_input if position_input in position_mapping.values() else 1

Select Position: (1) Forward, (2) Midfielder, (3) Defender, (4) GoalKeeper


In [37]:
player_data = [[age, previous_injuries, training_hours_per_week, sleep_hours_per_night, hydration_level, nutrition_habits, fitness_level, position]]
player_df = pd.DataFrame(player_data, columns=X.columns)

In [38]:
player_df_imputed = imputer.transform(player_df)

In [39]:
prediction = model.predict(player_df_imputed)

In [40]:
print("Predicted Injury Likelihood:", prediction[0][0])
print("Predictive Preventive Techniques:", prediction[0][1])
print("Predicted Type of Injury:", prediction[0][2])

Predicted Injury Likelihood: High
Predictive Preventive Techniques: Corrective stretching
Predicted Type of Injury: Cartilage Injury (High Risk)


In [41]:
y_pred = model.predict(X_test_imputed)
for i, target_name in enumerate(y_test.columns):
    print(f"\nClassification Report for {target_name}:")
    print(classification_report(y_test.iloc[:, i], y_pred[:, i]))


Classification Report for Injury_Likelihood:
              precision    recall  f1-score   support

        High       0.97      0.99      0.98       131
         Low       0.98      1.00      0.99       284
    Moderate       0.99      0.95      0.97       185

    accuracy                           0.98       600
   macro avg       0.98      0.98      0.98       600
weighted avg       0.98      0.98      0.98       600


Classification Report for Preventive_Techniques:
                                              precision    recall  f1-score   support

                    Active recovery sessions       0.24      0.25      0.24        56
           Balance and proprioception drills       0.17      0.19      0.18        32
              Balance and stability training       0.24      0.25      0.24        32
                    Core stability exercises       0.00      0.00      0.00         3
                        Corrective exercises       0.00      0.00      0.00        15
      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [47]:
dump(model, "injury_prediction_model.joblib")

['injury_prediction_model.joblib']