In [None]:
import pandas as pd
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.tree import DecisionTreeClassifier
from imblearn.over_sampling import SMOTE

DATA_FILE_PATH = "/Users/abhinavmittal/Desktop/Injury-Predictor-Analysis/final_data.csv"
MODEL_FILE_PATH = "/Users/abhinavmittal/Desktop/Injury-Predictor-Analysis/model.pkl"
SCALER_FILE_PATH = "/Users/abhinavmittal/Desktop/Injury-Predictor-Analysis/scaler.pkl"

def get_clean_data():
    try:
        data = pd.read_csv(DATA_FILE_PATH)
        print("Data loaded successfully.")
        return data
    except FileNotFoundError:
        print(f"Error: The file at {DATA_FILE_PATH} was not found.")
        return None
    except pd.errors.EmptyDataError:
        print("Error: The CSV file is empty.")
        return None
    except pd.errors.ParserError:
        print("Error: The CSV file is corrupted or incorrectly formatted.")
        return None

def create_model(data):
    X = data.iloc[:, 1:5].values  
    y = data['injury'].values
    
    oversample = SMOTE()
    X_resampled, y_resampled = oversample.fit_resample(X, y)

    scaler = StandardScaler()
    X_resampled = scaler.fit_transform(X_resampled)

    X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.3, random_state=0)

    model = DecisionTreeClassifier(max_depth=30, min_samples_split=100,random_state=0) 
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    print("Model Accuracy:", accuracy_score(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))

    return model, scaler

def main():
    data = get_clean_data()
    if data is None:
        return  

    model, scaler = create_model(data)

    try:
        with open(MODEL_FILE_PATH, 'wb') as model_file:
            pickle.dump(model, model_file)
        print(f"Model saved to {MODEL_FILE_PATH}")
        
        with open(SCALER_FILE_PATH, 'wb') as scaler_file:
            pickle.dump(scaler, scaler_file)
        print(f"Scaler saved to {SCALER_FILE_PATH}")
    except Exception as e:
        print(f"Error saving model or scaler: {e}")

    print("\nSample Data Overview:")
    print(data.head())

if __name__ == "__main__":
    main()


Data loaded successfully.
Model Accuracy: 0.8696612665684831
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.87      0.87       693
           1       0.86      0.87      0.87       665

    accuracy                           0.87      1358
   macro avg       0.87      0.87      0.87      1358
weighted avg       0.87      0.87      0.87      1358

Model saved to /Users/abhinavmittal/Downloads/ML/Injury-Predictor-Analysis/model/model.pkl
Scaler saved to /Users/abhinavmittal/Downloads/ML/Injury-Predictor-Analysis/model/scaler.pkl

Sample Data Overview:
   injury  game_workload  groin_squeeze  hip_mobility  rest_period
0       0            402          284.0          35.0          4.0
1       0            365          250.0          41.0          3.0
2       1            457          331.0          33.0          3.0
3       1            405          260.0          38.0          5.0
4       0            407          378.0       