In [52]:
import pandas as pd

# Load the merged datasets
als_data = pd.read_csv('/Users/opethompson/Desktop/ALS PROCESSED/Diagnostics/ALS Diagnosis (ALS Natural History).csv')

In [53]:
# Define a threshold for dropping columns (50% missing values)
threshold = 0.5
columns_to_drop = als_data.columns[als_data.isnull().mean() > threshold]
als_data.drop(columns=columns_to_drop, axis=1, inplace=True)

In [54]:
# handling missing data: For numerical columns, we use the median; for categorical columns, we use the mode
for col in als_data.columns:
    if als_data[col].dtype == 'object':  # Categorical data
        als_data[col].fillna(als_data[col].mode()[0], inplace=True)
    else:  # Numerical data
        als_data[col].fillna(als_data[col].median(), inplace=True)

In [55]:
#Drop Rows with Missing Values
als_data.dropna(inplace=True)


In [56]:
# Replace every occurrence of 99 with 0
als_data = als_data.replace(99, 0)

In [57]:
# Replace every occurrence of 90 with 0
als_data = als_data.replace(90, 0)

In [58]:
# Save the cleaned data to a new file
als_data.to_csv('preprocessedALShx_data.csv', index=False)


In [67]:
#training and evaluation
#Load data set
df= pd.read_csv('/Users/opethompson/Desktop/ALS PROCESSED/preprocessedALShx_data.csv')

#Building and Evaluating the Model for ALS Diagnosis
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.metrics import classification_report, roc_auc_score

# Split the data
X = df.drop('elescrlr', axis=1)
y = df['elescrlr']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
modeldx = RandomForestClassifier()
modeldx.fit(X_train, y_train)

# Evaluate the model
predictions = modeldx.predict(X_test)
print("Accuracy:", accuracy_score(y_test, predictions))

# Calculate precision, recall, and F1-score
print(classification_report(y_test, predictions))

# Calculate ROC-AUC for binary classification tasks
# We must check if y is binary before applying roc_auc_score
if len(y.unique()) == 2:
    probs = modeldx.predict_proba(X_test)[:, 1]  # get the probabilities of the positive class
    roc_auc = roc_auc_score(y_test, probs)
    print("ROC-AUC:", roc_auc)


Accuracy: 0.6212121212121212
              precision    recall  f1-score   support

           1       0.58      0.70      0.64        30
           2       0.50      0.47      0.48        47
           3       0.57      0.38      0.45        45
           4       0.58      0.68      0.63       100
           5       0.75      0.71      0.73       108

    accuracy                           0.62       330
   macro avg       0.60      0.59      0.59       330
weighted avg       0.62      0.62      0.62       330



In [65]:
# Feature Importance Analysis

import numpy as np

# Extracting feature importances
importance_diagnosis = modeldx.feature_importances_

# Function to summarize feature importances
def summarize_feature_importances(importances, feature_names, top_n=10):
    indices = np.argsort(importances)[::-1]
    top_features = [(feature_names[i], importances[i]) for i in indices[:top_n]]
    return top_features

# Top features for ALS Diagnosis
top_features_diagnosis = summarize_feature_importances(importance_diagnosis, df.columns)

top_features_diagnosis


[('internal_subject_id', 0.1943334199959146),
 ('blbcumn', 0.07199887079510064),
 ('blbclmn', 0.05702231890619021),
 ('trnkclmn', 0.05362854871856702),
 ('trnkelmn', 0.04817391724854775),
 ('rleelmn', 0.048002079849190395),
 ('rueelmn', 0.04762260819523785),
 ('trnkcumn', 0.04750520348741263),
 ('blbelmn', 0.04701654473291307),
 ('lleelmn', 0.0468976284140643)]

In [69]:
# Function to recommend treatment based on ALS diagnosis
def recommend_treatment(diagnosis):
    """
    Recommends treatment based on ALS diagnosis.

    :param diagnosis: The diagnosis result (1, 2, 3, 4, 5)
    :return: Recommended treatment
    """
    if diagnosis == 5:
        return "Standard ALS treatment protocol"
    elif diagnosis == 4:
        return "Probable ALS treatment protocol"
    elif diagnosis == 2 or diagnosis == 3:
        return "Conservative observation and symptomatic treatment"
    else:
        return "Further diagnostic evaluation required"