In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.impute import SimpleImputer

In [8]:
data = pd.read_csv("injury_pred.csv")
data.head()

Unnamed: 0,Age,Position,Previous_Injuries,Type_of_Previous_Injury,Fitness_Level,Training_Hours_Per_Week,Sleep_Hours_Per_Night,Nutrition_Habits,Hydration_Level,Injury_Likelihood,Preventive_Techniques,Type_of_Injury_Predicted
0,25,Forward,2,Hamstring Strain,High,10,7,Balanced,Adequate,Low,"Regular stretching, warm-up, cooldown",Muscle Strain
1,28,Midfielder,1,Ankle Sprain,Moderate,8,6,Varied,Insufficient,Moderate,Injury-specific strength training,Ligament Sprain
2,23,Defender,0,-,High,12,8,High Protein,Optimal,Low,Proper biomechanics during training sessions,No Injury (Low Risk)
3,30,Goalkeeper,3,Concussion,Moderate,6,7,Balanced,Adequate,High,Goalkeeper-specific injury prevention drills,Head Injury (High Risk)
4,27,Midfielder,1,Groin Strain,High,9,7,Varied,Adequate,Moderate,Agility and quickness exercises,Muscle Strain


In [9]:
data['Nutrition_Habits'] = data['Nutrition_Habits'].map({'Balanced': 1, 'Varied': 2, 'High Protein': 3})
data['Hydration_Level'] = data['Hydration_Level'].map({'Adequate': 1, 'Insufficient': 2, 'Optimal': 3})
data['Fitness_Level'] = data['Fitness_Level'].map({'Low': 1, 'Moderate': 2, 'High': 3})

In [10]:
X = data[['Age', 'Previous_Injuries', 'Training_Hours_Per_Week', 'Sleep_Hours_Per_Night', 'Hydration_Level', 'Nutrition_Habits', 'Fitness_Level']]

In [11]:
y = data[['Injury_Likelihood', 'Preventive_Techniques', 'Type_of_Injury_Predicted']]

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

In [14]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_imputed, y)

In [15]:
print("Please provide the following information:")
age = int(input("Age: "))
previous_injuries = int(input("Previous Injuries: "))
training_hours_per_week = int(input("Training Hours Per Week: "))
sleep_hours_per_night = int(input("Sleep Hours Per Night: "))
hydration_level = int(input("Hydration Level (1: Adequate, 2: Insufficient, 3: Optimal): "))
nutrition_habits = int(input("Nutrition Habits (1: Balanced, 2: Varied, 3: High Protein): "))
fitness_level = int(input("Fitness Level (1: Low, 2: Moderate, 3: High): "))

Please provide the following information:


In [16]:
player_data = [[age, previous_injuries, training_hours_per_week, sleep_hours_per_night, hydration_level, nutrition_habits, fitness_level]]
player_df = pd.DataFrame(player_data, columns=['Age', 'Previous_Injuries', 'Training_Hours_Per_Week', 'Sleep_Hours_Per_Night', 'Hydration_Level', 'Nutrition_Habits', 'Fitness_Level'])

In [17]:
player_df_imputed = imputer.transform(player_df)

In [18]:
prediction = model.predict(player_df_imputed)

In [19]:
print("Predicted Injury Likelihood:", prediction[0][0])
print("Predictive Preventive Techniques:", prediction[0][1])
print("Predicted Type of Injury:", prediction[0][2])

Predicted Injury Likelihood: Low
Predictive Preventive Techniques: Stretching and mobility drills
Predicted Type of Injury: No Injury (Low Risk)


In [20]:
y_pred = model.predict(X_test)



In [21]:
from sklearn.metrics import classification_report
for i, target_name in enumerate(y_test.columns):
    print(f"Classification Report for {target_name}:")
    print(classification_report(y_test.iloc[:, i], y_pred[:, i]))

Classification Report for Injury_Likelihood:
              precision    recall  f1-score   support

        High       1.00      0.96      0.98        27
         Low       0.98      1.00      0.99        54
    Moderate       0.97      0.97      0.97        30

    accuracy                           0.98       111
   macro avg       0.98      0.98      0.98       111
weighted avg       0.98      0.98      0.98       111

Classification Report for Preventive_Techniques:
                                              precision    recall  f1-score   support

                    Active recovery sessions       1.00      1.00      1.00         1
             Agility and quickness exercises       1.00      0.20      0.33         5
           Balance and proprioception drills       0.50      1.00      0.67         7
                       Cooling down properly       1.00      1.00      1.00         3
                    Core stability exercises       1.00      0.60      0.75        10
        

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [22]:
from joblib import dump, load

# Save the model
dump(model, "injury_prediction_model.joblib")

['injury_prediction_model.joblib']