In [1]:
# 📦 Imports
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.metrics import accuracy_score

In [2]:
# 📂 Load processed data
df = pd.read_csv('../data/processed_hair_loss.csv')

In [3]:
# 🎯 Features and target
X = df.drop('Hair Loss', axis=1)
y = df['Hair Loss']

In [4]:
# 🔀 Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Ridge Regression

In [6]:
# 📘 Ridge Regression

# One-hot encode categorical columns for Ridge Regression
X_train_ridge = pd.get_dummies(X_train)
X_test_ridge = pd.get_dummies(X_test)

# Align columns in case some categories are missing in test/train
X_train_ridge, X_test_ridge = X_train_ridge.align(X_test_ridge, join='left', axis=1, fill_value=0)

ridge_model = Ridge()
ridge_model.fit(X_train_ridge, y_train)
y_pred_ridge = ridge_model.predict(X_test_ridge).round().astype(int)
acc_ridge = accuracy_score(y_test, y_pred_ridge)
print("Ridge Regression Accuracy:", acc_ridge)

Ridge Regression Accuracy: 0.53


Random Forest Classifier

In [8]:
# 🌲 Random Forest
rf_model = RandomForestClassifier()
rf_model.fit(X_train_ridge, y_train)
y_pred_rf = rf_model.predict(X_test_ridge)
acc_rf = accuracy_score(y_test, y_pred_rf)
print("Random Forest Accuracy:", acc_rf)

Random Forest Accuracy: 0.475


Gaussian Naive Bayes

In [10]:
# 🧪 Gaussian NB
gnb = GaussianNB()
gnb.fit(X_train_ridge, y_train)
y_pred_gnb = gnb.predict(X_test_ridge)
acc_gnb = accuracy_score(y_test, y_pred_gnb)
print("Gaussian NB Accuracy:", acc_gnb)

Gaussian NB Accuracy: 0.53


Multimonial Naive Bayes

In [12]:
# 🧪 Multinomial NB (ensure non-negative features)
X_train_nb = X_train.copy()
X_test_nb = X_test.copy()

# Only set negatives to zero for numeric columns
num_cols = X_train_nb.select_dtypes(include=['number']).columns
X_train_nb[num_cols] = X_train_nb[num_cols].clip(lower=0)
X_test_nb[num_cols] = X_test_nb[num_cols].clip(lower=0)

In [14]:
# One-hot encode categorical columns for MultinomialNB
X_train_nb_enc = pd.get_dummies(X_train_nb)
X_test_nb_enc = pd.get_dummies(X_test_nb)

# Align columns in case some categories are missing in test/train
X_train_nb_enc, X_test_nb_enc = X_train_nb_enc.align(X_test_nb_enc, join='left', axis=1, fill_value=0)

mnb = MultinomialNB()
mnb.fit(X_train_nb_enc, y_train)
y_pred_mnb = mnb.predict(X_test_nb_enc)
acc_mnb = accuracy_score(y_test, y_pred_mnb)
print("Multinomial NB Accuracy:", acc_mnb)

Multinomial NB Accuracy: 0.535


In [16]:
results = {
    "Ridge Regression": acc_ridge,
    "Random Forest": acc_rf,
    "Gaussian NB": acc_gnb,
    "Multinomial NB": acc_mnb,
}

results_df = pd.DataFrame(results.items(), columns=['Model', 'Accuracy'])
results_df.sort_values(by="Accuracy", ascending=False)

Unnamed: 0,Model,Accuracy
3,Multinomial NB,0.535
0,Ridge Regression,0.53
2,Gaussian NB,0.53
1,Random Forest,0.475
