# Influence Data Analysis

This notebook explores classification of Parkinson’s disease phenotypes (TD vs PIGD) using logistic regression, Naive Bayes, and random forest models based on screening age, depression, and anxiety scores.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_curve, auc
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv("influence_data.csv")
df.head()

In [None]:
le = LabelEncoder()
df["phenotype_encoded"] = le.fit_transform(df["phenotype"])
df["phenotype_encoded"].value_counts()

In [None]:
X = df[["screening_age", "gds_total", "a_persistent_anxiety_total"]]
y = df["phenotype_encoded"]

In [None]:
log_model = LogisticRegression().fit(X, y)
nb_model = GaussianNB().fit(X, y)
rf_model = RandomForestClassifier(random_state=42).fit(X, y)

In [None]:
log_pred = log_model.predict(X)
nb_pred = nb_model.predict(X)
rf_pred = rf_model.predict(X)

log_prob = log_model.predict_proba(X)[:, 1]
nb_prob = nb_model.predict_proba(X)[:, 1]
rf_prob = rf_model.predict_proba(X)[:, 1]

In [None]:
print("Logistic Regression Report")
print(classification_report(y, log_pred))

print("Naive Bayes Report")
print(classification_report(y, nb_pred))

print("Random Forest Report")
print(classification_report(y, rf_pred))

In [None]:
fpr_log, tpr_log, _ = roc_curve(y, log_prob)
fpr_nb, tpr_nb, _ = roc_curve(y, nb_prob)
fpr_rf, tpr_rf, _ = roc_curve(y, rf_prob)

plt.figure(figsize=(8,6))
plt.plot(fpr_log, tpr_log, label='Logistic Regression')
plt.plot(fpr_nb, tpr_nb, label='Naive Bayes')
plt.plot(fpr_rf, tpr_rf, label='Random Forest')
plt.plot([0,1], [0,1], 'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve - Phenotype Prediction")
plt.legend()
plt.grid(True)
plt.show()