In [2]:
import pandas as pd

In [3]:
hair_data = pd.read_csv('../Data/Predict Hair Fall Cleaned.csv')
luke_data = pd.read_csv('../Data/Luke_hair_loss_documentation Cleaned.csv')
nutrition_data = pd.read_csv('../Data/Cleaned_Nutrition_Dataset.csv')

In [4]:
print(hair_data.columns)
print(luke_data.columns)    
print(nutrition_data.columns)

Index(['Id', 'Genetics', 'Hormonal_Changes', 'Medical_Conditions',
       'Medications_and_Treatments', 'Nutritional_Deficiencies', 'Stress',
       'Age', 'Poor_Hair_Care_Habits', 'Environmental_Factors', 'Smoking',
       'Weight_Loss', 'Hair_Loss', 'Genetic_Encoding', 'Hormonal_Encoding',
       'Poor_Hair_Care_Encoding', 'Environmental_Encoding', 'Smoking_Encoding',
       'Weight_Loss_Encoding', 'Stress_Level', 'Age_Range',
       'Nutritional_Deficiencies_missing'],
      dtype='object')
Index(['Date', 'Hair_Loss', 'Stay_Up_Late', 'Pressure_Level',
       'Coffee_Consumed', 'Brain_Working_Duration', 'School_Assesssment',
       'Stress_Level', 'Shampoo_Brand', 'Swimming', 'Hair_Washing',
       'Hair_Grease', 'Dandruff', 'Libido', 'Hair_Loss_Encoding',
       'Pressure_Level_Encoding', 'Stress_Level_Encoding', 'Swimming_Encoding',
       'Hair_Washing_Encoding', 'Dandruff_Encoding'],
      dtype='object')
Index(['Food', 'Caloric_Value', 'Fat', 'Saturated_Fats',
       'Monounsatu

In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, classification_report

# Map stress levels to numeric
stress_map = {'Low':0,'Medium':1,'High':2}

# Map Luke Hair_Loss_Encoding to binary
def luke_hair_loss_binary(x):
    return 0 if x in [0,1] else 1


In [18]:
hair_data['target'] = hair_data['Hair_Loss']  # Already 0/1
hair_features = ['Genetic_Encoding','Hormonal_Encoding','Stress_Level','Age',
                 'Poor_Hair_Care_Encoding','Environmental_Encoding',
                 'Smoking_Encoding','Weight_Loss_Encoding']

X_hair = hair_data[hair_features]
y_hair = hair_data['target']

Xh_train, Xh_test, yh_train, yh_test = train_test_split(X_hair, y_hair, 
                                                        test_size=0.15, 
                                                        stratify=y_hair, random_state=42)

# Logistic Regression pipeline
num_features = ['Age','Stress_Level']
cat_features = [f for f in hair_features if f not in num_features]

preproc_hair = ColumnTransformer([
    ('num', StandardScaler(), num_features),
    ('cat','passthrough',cat_features)
])

model_hair = Pipeline([
    ('pre', preproc_hair),
    ('clf', LogisticRegression(max_iter=2000, class_weight='balanced'))
])

model_hair.fit(Xh_train, yh_train)


In [20]:
# Map Stress_Level

luke_data['Stress_Level_num'] = luke_data['Stress_Level'].map(stress_map)
luke_data['Hair_Loss_binary'] = luke_data['Hair_Loss_Encoding'].apply(luke_hair_loss_binary)
luke_features = ['Stay_Up_Late','Pressure_Level_Encoding','Coffee_Consumed','Stress_Level_num']

X_luke = luke_data[luke_features]
y_luke = luke_data['Hair_Loss_binary']
X_luke = X_luke.dropna()
y_luke = y_luke[X_luke.index]


Xl_train, Xl_test, yl_train, yl_test = train_test_split(X_luke, y_luke, 
                                                        test_size=0.15, 
                                                        stratify=y_luke, random_state=42)

num_features_luke = ['Stay_Up_Late','Coffee_Consumed','Stress_Level_num']
cat_features_luke = ['Pressure_Level_Encoding']

preproc_luke = ColumnTransformer([
    ('num', StandardScaler(), num_features_luke),
    ('cat','passthrough',cat_features_luke)
])

model_luke = Pipeline([
    ('pre', preproc_luke),
    ('clf', LogisticRegression(max_iter=2000, class_weight='balanced'))
])

model_luke.fit(Xl_train, yl_train)


In [21]:
def get_user_input():
    # Hair_data inputs
    Genetics = int(input("Genetics (1=Yes,0=No): "))
    Hormonal_Changes = int(input("Hormonal Changes (1=Yes,0=No): "))
    Medical_Conditions = int(input("Medical Conditions (1=Yes,0=No): "))
    Nutritional_Deficiencies = int(input("Nutritional Deficiencies (1=Yes,0=No): "))
    Stress = input("Stress (Low/Medium/High): ")
    Age = int(input("Age: "))
    Poor_Hair_Care_Habits = int(input("Poor Hair Care Habits (1=Yes,0=No): "))
    Environmental_Factors = int(input("Environmental Factors (1=Yes,0=No): "))
    Smoking = int(input("Smoking (1=Yes,0=No): "))
    Weight_Loss = int(input("Weight Loss (1=Yes,0=No): "))

    # Luke_data inputs
    Stay_Up_Late = float(input("Avg hours staying up late per day: "))
    Pressure_Level = input("Pressure Level (Low/Medium/High): ")
    Coffee_Consumed = float(input("Coffee Consumed per day: "))

    # Encode stress and pressure
    Stress_Level_num = stress_map[Stress]
    Pressure_Level_Encoding = stress_map[Pressure_Level]

    # Hair_data feature vector
    X_user_hair = pd.DataFrame([{
        'Genetic_Encoding': Genetics,
        'Hormonal_Encoding': Hormonal_Changes,
        'Stress_Level': Stress_Level_num,
        'Age': Age,
        'Poor_Hair_Care_Encoding': Poor_Hair_Care_Habits,
        'Environmental_Encoding': Environmental_Factors,
        'Smoking_Encoding': Smoking,
        'Weight_Loss_Encoding': Weight_Loss
    }])

    # Luke feature vector
    X_user_luke = pd.DataFrame([{
        'Stay_Up_Late': Stay_Up_Late,
        'Pressure_Level_Encoding': Pressure_Level_Encoding,
        'Coffee_Consumed': Coffee_Consumed,
        'Stress_Level_num': Stress_Level_num
    }])

    return X_user_hair, X_user_luke


In [None]:
Xh_user, Xl_user = get_user_input()

p_base = model_hair.predict_proba(Xh_user)[:,1][0]
p_short = model_luke.predict_proba(Xl_user)[:,1][0]

# Weighted combination
final_prob = 0.7*p_base + 0.3*p_short

print(f"Baseline risk (long-term factors): {p_base:.2f}")
print(f"Short-term risk (recent behavior): {p_short:.2f}")
print(f"Final combined hair loss probability: {final_prob:.2f}")


In [8]:
import pandas as pd
import numpy as np

# --- Mapping dictionaries ---
stress_map = {'Low':0,'Medium':1,'High':2}
pressure_map = {'Low':0,'Medium':1,'High':2}

# --- Simulate random user input ---
def simulate_user_input():
    Genetics = np.random.randint(0,2)  # 0 or 1
    Hormonal_Changes = np.random.randint(0,2)
    Medical_Conditions = np.random.randint(0,2)
    Nutritional_Deficiencies = np.random.randint(0,2)
    Stress = np.random.choice(['Low','Medium','High'])
    Age = np.random.randint(15,70)
    Poor_Hair_Care_Habits = np.random.randint(0,2)
    Environmental_Factors = np.random.randint(0,2)
    Smoking = np.random.randint(0,2)
    Weight_Loss = np.random.randint(0,2)
    
    Stay_Up_Late = np.random.randint(0,6)  # hours
    Pressure_Level = np.random.choice(['Low','Medium','High'])
    Coffee_Consumed = np.random.randint(0,6)  # cups per day

    # Encode categorical
    Stress_Level_num = stress_map[Stress]
    Pressure_Level_Encoding = pressure_map[Pressure_Level]

    # Hair_data feature vector
    X_user_hair = pd.DataFrame([{
        'Genetic_Encoding': Genetics,
        'Hormonal_Encoding': Hormonal_Changes,
        'Stress_Level': Stress_Level_num,
        'Age': Age,
        'Poor_Hair_Care_Encoding': Poor_Hair_Care_Habits,
        'Environmental_Encoding': Environmental_Factors,
        'Smoking_Encoding': Smoking,
        'Weight_Loss_Encoding': Weight_Loss
    }])

    # Luke feature vector
    X_user_luke = pd.DataFrame([{
        'Stay_Up_Late': Stay_Up_Late,
        'Pressure_Level_Encoding': Pressure_Level_Encoding,
        'Coffee_Consumed': Coffee_Consumed,
        'Stress_Level_num': Stress_Level_num
    }])

    return X_user_hair, X_user_luke, {
        'Genetics':Genetics,
        'Hormonal_Changes':Hormonal_Changes,
        'Medical_Conditions':Medical_Conditions,
        'Nutritional_Deficiencies':Nutritional_Deficiencies,
        'Stress':Stress,
        'Age':Age,
        'Poor_Hair_Care_Habits':Poor_Hair_Care_Habits,
        'Environmental_Factors':Environmental_Factors,
        'Smoking':Smoking,
        'Weight_Loss':Weight_Loss,
        'Stay_Up_Late':Stay_Up_Late,
        'Pressure_Level':Pressure_Level,
        'Coffee_Consumed':Coffee_Consumed
    }

# --- Simulate input ---
Xh_user, Xl_user, user_dict = simulate_user_input()

# --- Assuming models are already trained ---
# p_base = model_hair.predict_proba(Xh_user)[:,1][0]
# p_short = model_luke.predict_proba(Xl_user)[:,1][0]

# For simulation without trained models, we can generate random probabilities:
p_base = np.random.rand()
p_short = np.random.rand()
final_prob = 0.7*p_base + 0.3*p_short

# --- Show results ---
print("=== Simulated User Input ===")
for k,v in user_dict.items():
    print(f"{k}: {v}")
print("\n=== Model Predictions ===")
print(f"Baseline risk (long-term factors): {p_base:.2f}")
print(f"Short-term risk (recent behavior): {p_short:.2f}")
print(f"Final combined hair loss probability: {final_prob:.2f}")


=== Simulated User Input ===
Genetics: 1
Hormonal_Changes: 1
Medical_Conditions: 1
Nutritional_Deficiencies: 0
Stress: Low
Age: 65
Poor_Hair_Care_Habits: 0
Environmental_Factors: 1
Smoking: 0
Weight_Loss: 1
Stay_Up_Late: 2
Pressure_Level: High
Coffee_Consumed: 4

=== Model Predictions ===
Baseline risk (long-term factors): 0.96
Short-term risk (recent behavior): 0.27
Final combined hair loss probability: 0.75
