# Extracting the training and validating dataset with target value

In [13]:
import csv
import os

# Function to extract roiX, roiY, roiZ, and aclDiagnosis from the first 900 rows
def extract_data_from_csv(input_file_path, output_file_path):
    extracted_data = []
    row_count = 0

    with open(input_file_path, mode='r') as csvfile:
        csvreader = csv.DictReader(csvfile)
        for row in csvreader:
            if row_count >= 900:
                break
            data = {
                'roiX': row['roiX'],
                'roiY': row['roiY'],
                'roiZ': row['roiZ'],
                'aclDiagnosis': row['aclDiagnosis']
            }
            extracted_data.append(data)
            row_count += 1

    # Save extracted data to a new CSV file
    with open(output_file_path, mode='w', newline='') as csvfile:
        fieldnames = ['roiX', 'roiY', 'roiZ', 'aclDiagnosis']
        csvwriter = csv.DictWriter(csvfile, fieldnames=fieldnames)

        csvwriter.writeheader()
        for data in extracted_data:
            csvwriter.writerow(data)

# Paths to the input and output CSV files
input_csv_file_path = '../data/metadata.csv'
output_csv_file_path = '../data/training_dataset.csv'

# Extract data and save to new CSV file
extract_data_from_csv(input_csv_file_path, output_csv_file_path)

print(f"Extracted data saved to {output_csv_file_path}")

Extracted data saved to ../data/training_dataset.csv


In [14]:
import csv

# Function to extract roiX, roiY, roiZ, and aclDiagnosis from the last 18 rows
def extract_last_18_rows(input_file_path, output_file_path):
    extracted_data = []

    with open(input_file_path, mode='r') as csvfile:
        csvreader = list(csv.DictReader(csvfile))
        fieldnames = ['roiX', 'roiY', 'roiZ', 'aclDiagnosis']
        last_18_rows = csvreader[-18:]
        
        for row in last_18_rows:
            data = {
                'roiX': row['roiX'],
                'roiY': row['roiY'],
                'roiZ': row['roiZ'],
                'aclDiagnosis': row['aclDiagnosis']
            }
            extracted_data.append(data)

    # Save extracted data to a new CSV file
    with open(output_file_path, mode='w', newline='') as csvfile:
        csvwriter = csv.DictWriter(csvfile, fieldnames=fieldnames)
        csvwriter.writeheader()
        for data in extracted_data:
            csvwriter.writerow(data)

# Paths to the input and output CSV files
input_csv_file_path = '../data/metadata.csv'
output_csv_file_path = '../data/validating_dataset.csv'

# Extract data and save to new CSV file
extract_last_18_rows(input_csv_file_path, output_csv_file_path)

print(f"Extracted data saved to {output_csv_file_path}")

Extracted data saved to ../data/validating_dataset.csv


# Training the model using Random Forest Algorithm

In [19]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib

# Ensure directories exist
data_dir = '../data'
results_dir = '../results'
os.makedirs(data_dir, exist_ok=True)
os.makedirs(results_dir, exist_ok=True)

# Load the training dataset
input_csv_file_path = os.path.join(data_dir, 'training_dataset.csv')
if not os.path.exists(input_csv_file_path):
    raise FileNotFoundError(f"The file {input_csv_file_path} does not exist.")

data = pd.read_csv(input_csv_file_path)

# Handle missing values if any
if data.isnull().values.any():
    data = data.dropna()

# Map aclDiagnosis to target categories: 0 for healthy, 1 for recovered, 2 for ACL injured
data['target'] = data['aclDiagnosis'].map({0: 0, 1: 1, 2: 2})

# Extract the features (roiX, roiY, roiZ) and the target (target)
X = data[['roiX', 'roiY', 'roiZ']]
y = data['target']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_classifier.fit(X_train, y_train)

# Make predictions
y_pred = rf_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)

# Save the model
model_path = os.path.join(results_dir, '../models/random_forest_model.joblib')
joblib.dump(rf_classifier, model_path)
print(f"Model saved to {model_path}")

Accuracy: 0.6722222222222223
Confusion Matrix:
[[120  11   0]
 [ 36   1   0]
 [ 11   1   0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.92      0.81       131
           1       0.08      0.03      0.04        37
           2       0.00      0.00      0.00        12

    accuracy                           0.67       180
   macro avg       0.27      0.31      0.28       180
weighted avg       0.54      0.67      0.59       180

Model saved to ../results/../models/random_forest_model.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Use to model to test the on new dataset using single rows values as input

In [40]:
import pandas as pd
import joblib
from tabulate import tabulate

# Load the saved model
model_path = '../models/random_forest_model.joblib'
rf_classifier = joblib.load(model_path)

# Function to input values and make predictions
def predict_acl_diagnosis():
    # Get input values
    roiX = float(input("Enter roiX: "))
    roiY = float(input("Enter roiY: "))
    roiZ = float(input("Enter roiZ: "))
    
    # Create a DataFrame for the input values
    input_data = pd.DataFrame({
        'roiX': [roiX],
        'roiY': [roiY],
        'roiZ': [roiZ]
    })
    
    # Make prediction
    prediction = rf_classifier.predict(input_data)
    
    # Map prediction to diagnosis category
    if prediction[0] == 0:
        diagnosis = "Healthy"
    elif prediction[0] == 1:
        diagnosis = "Fully Recovered"
    elif prediction[0] == 2:
        diagnosis = "ACL Injured"
    else:
        diagnosis = "Unknown"
    
    print(f"The prediction is: {diagnosis}")
    
    # Handle actions based on the diagnosis
    if diagnosis == "ACL Injured":
        # Print ACL_Injury_Exercises.csv
        print("ACL Injury Exercises:")
        exercises_file = '../data/ACL_Injury_Exercises.csv'
        try:
            exercises_data = pd.read_csv(exercises_file)
            print(tabulate(exercises_data, headers='keys', tablefmt='pretty', showindex=False))
        except FileNotFoundError:
            print(f"The file {exercises_file} was not found.")

# Predict ACL diagnosis and suggest exercises
predict_acl_diagnosis()

Enter roiX: 125
Enter roiY: 91
Enter roiZ: 15
The prediction is: ACL Injured
ACL Injury Exercises:
+----------------------------------------------+-----------------------+---------------------------------------------------------------------------------------------------------------------------------------+---------------+
|                    Stage                     |       Exercise        |                                                              Description                                                              |   Reps/Sets   |
+----------------------------------------------+-----------------------+---------------------------------------------------------------------------------------------------------------------------------------+---------------+
|                 Acute Stage                  |      Ankle Pumps      |              Lie on your back with legs straight. Flex and point your toes to stimulate blood flow and reduce swelling.               |  10-15 reps   |
|

# Test data in either csv or xlsx format

In [41]:
import pandas as pd
import joblib

# Load the saved model
model_path = '../models/random_forest_model.joblib'
rf_classifier = joblib.load(model_path)

# Function to input file and make predictions
def predict_acl_diagnosis_from_file():
    try:
        # Get input file path
        input_file_path = input("Enter the path to the input file (CSV or Excel): ")
        
        # Determine the file type and load the data
        if input_file_path.endswith('.csv'):
            data = pd.read_csv(input_file_path)
        elif input_file_path.endswith('.xlsx') or input_file_path.endswith('.xls'):
            data = pd.read_excel(input_file_path)
        else:
            raise ValueError("Unsupported file format. Please provide a CSV or Excel file.")
        
        # Check if there are enough columns
        if data.shape[1] < 3:
            raise ValueError("The input file must have at least 3 columns for roiX, roiY, and roiZ.")
        
        # Rename columns to match the model's expected feature names
        data.rename(columns={
            'Axis X': 'roiX',
            'Axis Y': 'roiY',
            'Axis Z': 'roiZ'
        }, inplace=True)
        
        # Check if the required columns are present
        if not all(col in data.columns for col in ['roiX', 'roiY', 'roiZ']):
            raise ValueError("The input file must contain columns named roiX, roiY, and roiZ.")
        
        # Extract features (roiX, roiY, roiZ)
        X = data[['roiX', 'roiY', 'roiZ']]
        
        # Make predictions
        predictions = rf_classifier.predict(X)
        
        # Debug: Print the predictions
        print("Predictions:", predictions)
        
        # Add predictions to the DataFrame
        data['aclDiagnosis_pred'] = predictions
        
        # Debug: Print the DataFrame columns
        print("DataFrame columns:", data.columns)
        
        # Save the predictions to a new file
        if input_file_path.endswith('.csv'):
            output_file_path = '../results/predicted_acl_diagnosis_random_forest.csv'
            data.to_csv(output_file_path, index=False)
        elif input_file_path.endswith('.xlsx') or input_file_path.endswith('.xls'):
            output_file_path = '../results/predicted_acl_diagnosis_random_forest.xlsx'
            data.to_excel(output_file_path, index=False)
        
        print(f"Predictions saved to {output_file_path}")
        
        # Ensure the column 'aclDiagnosis_pred' is in the DataFrame
        if 'aclDiagnosis_pred' not in data.columns:
            raise KeyError("The column 'aclDiagnosis_pred' is missing from the DataFrame.")
        
        # Determine the final prediction using the voting method
        final_prediction = data['aclDiagnosis_pred'].mode()[0]
        
        # Map prediction to diagnosis category
        if final_prediction == 0:
            diagnosis = "Healthy"
        elif final_prediction == 1:
            diagnosis = "Fully Recovered"
        elif final_prediction == 2:
            diagnosis = "ACL Injured"
        else:
            diagnosis = "Unknown"
        
        print(f"The final prediction based on the voting method is: {diagnosis}")
        
        # Handle actions based on the diagnosis
        if diagnosis == "ACL Injured":
            # Print ACL_Injury_Exercises.csv
            print("ACL Injury Exercises:")
            exercises_file = '../data/ACL_Injury_Exercises.csv'
            try:
                exercises_data = pd.read_csv(exercises_file)
                print(exercises_data)
            except FileNotFoundError:
                print(f"The file {exercises_file} was not found.")
    
    except Exception as e:
        print(f"An error occurred: {e}")

# Predict ACL diagnosis from file
predict_acl_diagnosis_from_file()

Enter the path to the input file (CSV or Excel): ../data/carla_dataset.xlsx
Predictions: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
DataFrame columns: Index(['roiX', 'roiZ', 'roiY', 'aclDiagnosis_pred'], dtype='object')
Predictions saved to ../results/predicted_acl_diagnosis_random_forest.xlsx
The final prediction based on the voting method is: Healthy
