<a href="https://colab.research.google.com/github/Amit-Jindar/AyurGenix/blob/main/AyurGenix.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Scalp Classification Model**

In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D, Input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.regularizers import l2
from google.colab import drive
from sklearn.model_selection import train_test_split

# Step 1: Mount Google Drive
drive.mount('/content/drive')

dataset_folder = "/content/drive/MyDrive/dataset"

# Step 2: Load and preprocess images efficiently
def load_images_from_folder(folder, img_size=(160, 160)):
    images = []
    labels = []
    class_dict = {}
    class_idx = 0

    for root, dirs, files in os.walk(folder):
        if not files:
            continue
        class_name = os.path.basename(root)
        if class_name not in class_dict:
            class_dict[class_name] = class_idx
            class_idx += 1

        for filename in files:
            img_path = os.path.join(root, filename)
            img = cv2.imread(img_path)
            if img is None:
                print(f"Warning: Unable to read image {img_path}")
                continue
            img = cv2.resize(img, img_size)
            images.append(img)
            labels.append(class_dict[class_name])

    return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32), class_dict

# Load dataset
images, labels, class_dict = load_images_from_folder(dataset_folder)

# Print class distribution
print("Class Distribution:", np.bincount(labels))

# Normalize images
images = images / 255.0

# Free up memory
import gc
gc.collect()

# Step 3: Use tf.data for efficient processing
def preprocess_image(image, label):
    return tf.image.resize(image, (160, 160)), label

# Split dataset properly using train_test_split
X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, stratify=labels, random_state=42)

# Convert to TensorFlow dataset
batch_size = 64
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).map(preprocess_image).batch(batch_size).cache().repeat().prefetch(tf.data.AUTOTUNE)
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).map(preprocess_image).batch(batch_size).cache().repeat().prefetch(tf.data.AUTOTUNE)

# Step 4: Optimized Data Augmentation
data_augmentation = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.15,
    height_shift_range=0.15,
    horizontal_flip=True,
    zoom_range=0.2,
    shear_range=0.1,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

# Step 5: Define CNN model with EfficientNetB3 (Optimized for Memory)
base_model = EfficientNetB3(weights='imagenet', include_top=False, input_shape=(160, 160, 3))
base_model.trainable = True

# Freeze first 100 layers
for layer in base_model.layers[:100]:
    layer.trainable = False

model = Sequential([
    Input(shape=(160, 160, 3)),
    base_model,
    GlobalAveragePooling2D(),
    BatchNormalization(),
    Dense(512, activation='relu', kernel_regularizer=l2(0.01)),
    Dropout(0.4),
    Dense(256, activation='relu', kernel_regularizer=l2(0.01)),
    Dropout(0.4),
    Dense(len(class_dict), activation='softmax')
])

# Step 6: Compile model with improved optimizer
optimizer = SGD(learning_rate=0.001, momentum=0.9, nesterov=True)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Enable automatic checkpoint saving
checkpoint_path = "/content/drive/MyDrive/hair_loss_model_checkpoint.keras"
checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, save_best_only=True, monitor='val_loss', mode='min')

# Reduce RAM usage by enabling garbage collection
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Reduce Learning Rate on Plateau
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=3, min_lr=1e-6)

# Learning rate warm-up
def scheduler(epoch, lr):
    if epoch < 5:
        return lr * 1.2
    else:
        return lr * 0.9

lr_warmup = LearningRateScheduler(scheduler)

# Enable TensorFlow mixed precision for efficiency
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')

# Train with optimized settings
steps_per_epoch = len(X_train) // batch_size
val_steps = len(X_val) // batch_size

model.fit(train_dataset, validation_data=val_dataset, epochs=30, steps_per_epoch=steps_per_epoch, validation_steps=val_steps, callbacks=[checkpoint_callback, early_stopping, lr_scheduler, lr_warmup])

# Evaluate model accuracy
val_loss, val_accuracy = model.evaluate(val_dataset, steps=val_steps)
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")

print("Model training completed and best version saved!")

**HairFall Prediction Model**


In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
import pandas as pd
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.regularizers import l2
from google.colab import drive
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from imblearn.over_sampling import SMOTE
from imblearn.combine import SMOTETomek
import joblib
import optuna
import xgboost as xgb
import lightgbm as lgb

# Ensure necessary libraries are installed
!pip install tensorflow pandas numpy scikit-learn imbalanced-learn joblib optuna xgboost lightgbm

def mount_drive():
    drive.mount('/content/drive',force_remount=True)

def load_image_model(model_path):
    if os.path.exists(model_path):
        model = load_model(model_path)
        print("Loaded pre-trained image classification model.")
        return model
    else:
        print("Model checkpoint not found! Please train the image classification model first.")
        return None

def optimize_hyperparams(trial, X_train, y_train, X_val, y_val):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 200, 400),
        'max_depth': trial.suggest_int('max_depth', 6, 12),
        'num_leaves': trial.suggest_int('num_leaves', 20, 50),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 1.0),
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-3, 1.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-3, 1.0)
    }
    model = lgb.LGBMClassifier(**params, n_jobs=-1)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_val)
    return accuracy_score(y_val, y_pred)

def train_hair_fall_prediction(model_save_path, dataset_path):
    questionnaire_data = pd.read_csv(dataset_path)
    questionnaire_data.columns = questionnaire_data.columns.str.strip()

    if 'Hair Loss' not in questionnaire_data.columns:
        raise KeyError("Column 'Hair Loss' not found in dataset. Check column names:", questionnaire_data.columns)

    X = questionnaire_data.drop(columns=['Hair Loss'])
    y = questionnaire_data['Hair Loss']
    X = pd.get_dummies(X, drop_first=True)
    y = LabelEncoder().fit_transform(y)

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    if len(np.unique(y)) > 1:
        try:
            smote = SMOTE(random_state=42, sampling_strategy="auto")
            X_resampled, y_resampled = smote.fit_resample(X_scaled, y)
        except ValueError:
            print("SMOTE failed, switching to SMOTETomek.")
            smote_tomek = SMOTETomek(random_state=42)
            X_resampled, y_resampled = smote_tomek.fit_resample(X_scaled, y)
    else:
        print("Skipping SMOTE due to insufficient class variance.")
        X_resampled, y_resampled = X_scaled, y

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    best_model = None
    best_accuracy = 0

    for train_index, val_index in skf.split(X_resampled, y_resampled):
        X_train, X_val = X_resampled[train_index], X_resampled[val_index]
        y_train, y_val = y_resampled[train_index], y_resampled[val_index]

        study = optuna.create_study(direction='maximize')
        study.optimize(lambda trial: optimize_hyperparams(trial, X_train, y_train, X_val, y_val), n_trials=10)

        best_params = study.best_params
        lgb_model = lgb.LGBMClassifier(**best_params, n_jobs=-1)
        rf_model = RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1)

        stack_model = StackingClassifier(
            estimators=[('lgb', lgb_model), ('rf', rf_model)],
            final_estimator=lgb.LGBMClassifier(n_jobs=-1)
        )

        stack_model.fit(X_train, y_train)
        y_pred = stack_model.predict(X_val)
        accuracy = accuracy_score(y_val, y_pred)

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model = stack_model

        print(f"Stacking Model Accuracy: {accuracy * 100:.2f}%")

    print(f"Best Model Accuracy: {best_accuracy * 100:.2f}%")
    joblib.dump(best_model, model_save_path)
    print(f"Hair Fall Prediction model saved at {model_save_path}")
    return best_model

if __name__ == "__main__":
    mount_drive()
    model_path = "/content/drive/MyDrive/hair_loss_model_checkpoint.keras"
    hair_fall_model_path = "/content/drive/MyDrive/hair_fall_prediction_model.pkl"
    dataset_path = "/content/drive/MyDrive/Predict Hair Fall.csv"

    image_model = load_image_model(model_path)
    hair_fall_model = train_hair_fall_prediction(hair_fall_model_path, dataset_path)


**Recommendation Engine**

In [None]:
import pandas as pd
import os

# Load questionnaire predictions
csv_file = "/content/drive/MyDrive/Predict Hair Fall.csv"
df = pd.read_csv(csv_file)

# Define scalp classification folder structure
scalp_folder = "/content/drive/MyDrive/Scalp"
scalp_categories = {
    "Doctor Required": ["Doctor Required"],
    "Female": [
        "Fem Recede Edge", "Hair Parting & Thinning Patterns", "Patchy Hair Loss",
        "Scalp Thinning Conditions", "Scalp Thinning Conditions Stage 2", "Scalp Thinning Conditions Stage 3"
    ],
    "Male": [
        "Stage 1", "Stage 2", "Stage 3", "Stage 4", "Stage 5", "Stage 6", "Stage 7", "Random Patches"
    ]
}

# Define recommendations for each scalp category
recommendations = {
    "Stage 1": {
        "food": ["Spinach", "Carrots", "Salmon", "Almonds"],
        "exercise": ["Yoga", "Neck Stretching"],
        "youtube_links": ["https://www.youtube.com/watch?v=example1"],
        "products": ["Herbal Shampoo", "Scalp Nourishing Oil"],
        "tips": "Use mild shampoo, avoid excessive heat styling."
    },
    "Stage 2": {
        "food": ["Eggs", "Avocados", "Berries", "Walnuts"],
        "exercise": ["Scalp Massage", "Neck Tilts"],
        "youtube_links": ["https://www.youtube.com/watch?v=example2"],
        "products": ["Hydrating Hair Serum", "Anti-Hair Fall Shampoo"],
        "tips": "Avoid tight hairstyles, ensure adequate hydration."
    },
    "Stage 3": {
        "food": ["Sweet Potatoes", "Flaxseeds", "Greek Yogurt", "Pumpkin Seeds"],
        "exercise": ["Aerobic Exercises", "Acupressure Therapy"],
        "youtube_links": ["https://www.youtube.com/watch?v=example3"],
        "products": ["Deep Repair Hair Mask", "Nourishing Hair Oil"],
        "tips": "Increase protein intake, avoid excessive chemical treatments."
    },
    "Stage 4": {
        "food": ["Lentils", "Brazil Nuts", "Sunflower Seeds", "Milk"],
        "exercise": ["Strength Training", "Light Cardio"],
        "youtube_links": ["https://www.youtube.com/watch?v=example8"],
        "products": ["Scalp Stimulant Oil", "Revitalizing Hair Serum"],
        "tips": "Reduce stress, maintain a healthy sleep cycle."
    },
    "Stage 5": {
        "food": ["Oats", "Lean Meats", "Soy Protein", "Dark Chocolate"],
        "exercise": ["Headstand (with caution)", "Breathing Exercises"],
        "youtube_links": ["https://www.youtube.com/watch?v=example9"],
        "products": ["Keratin Boost Shampoo", "DHT Blocker Supplement"],
        "tips": "Limit alcohol consumption, increase hydration."
    },
    "Stage 6": {
        "food": ["Zinc-rich Foods", "Coconut Water", "Citrus Fruits"],
        "exercise": ["Relaxation Therapy", "Scalp Rolling"],
        "youtube_links": ["https://www.youtube.com/watch?v=example10"],
        "products": ["Hair Growth Oil", "Vitamin-Enriched Hair Mask"],
        "tips": "Reduce inflammation, consume more antioxidants."
    },
    "Stage 7": {
        "food": ["Whole Grains", "Omega-3 Foods", "Green Tea"],
        "exercise": ["Light Stretching", "Acupressure"],
        "youtube_links": ["https://www.youtube.com/watch?v=example11"],
        "products": ["Intensive Hair Regrowth Kit"],
        "tips": "Consult a specialist for advanced treatment options."
    },
    "Random Patches": {
        "food": ["Chia Seeds", "Green Tea", "Turmeric", "Almonds"],
        "exercise": ["Scalp Massage", "Breathing Exercises"],
        "youtube_links": ["https://www.youtube.com/watch?v=example4"],
        "products": ["Hair Rejuvenation Serum", "Anti-Hair Fall Kit"],
        "tips": "Increase iron intake, consult a dermatologist if worsening."
    },
    "Fem Recede Edge": {
        "food": ["Soybeans", "Avocados", "Nuts", "Eggs"],
        "exercise": ["Scalp Stimulation Exercises", "Neck Rolls"],
        "youtube_links": ["https://www.youtube.com/watch?v=example5"],
        "products": ["Hair Strengthening Serum", "Biotin Shampoo"],
        "tips": "Avoid harsh chemical treatments, ensure scalp hydration."
    },
    "Hair Parting & Thinning Patterns": {
        "food": ["Nuts", "Eggs", "Milk", "Leafy Greens"],
        "exercise": ["Scalp Massage", "Towel Scrubbing"],
        "youtube_links": ["https://www.youtube.com/watch?v=example12"],
        "products": ["Volume Boost Shampoo", "Thickening Hair Serum"],
        "tips": "Use sulfate-free shampoo, maintain a balanced diet."
    },
    "Patchy Hair Loss": {
        "food": ["Protein-rich Foods", "Dark Leafy Greens", "Vitamin C Foods"],
        "exercise": ["Gentle Hair Brushing", "Relaxation Therapy"],
        "youtube_links": ["https://www.youtube.com/watch?v=example6"],
        "products": ["Anti-Patchy Hair Serum", "Hair Growth Boost Tonic"],
        "tips": "Monitor scalp conditions, consult a specialist if severe."
    },
    "Doctor Required": {
        "food": ["Consult a specialist for personalized diet."],
        "exercise": ["Consult a specialist before exercises."],
        "youtube_links": ["https://www.youtube.com/watch?v=example7"],
        "products": ["Prescription-based solutions"],
        "tips": "Immediate medical attention is recommended."
    }
}

# Define a default recommendation for unknown scalp conditions
default_recommendation = {
    "food": ["Consult a specialist for personalized diet."],
    "exercise": ["Consult a specialist before exercises."],
    "youtube_links": ["https://www.youtube.com/watch?v=example7"],
    "products": ["General Hair Care Products"],  # Provide a generic product suggestion
    "tips": "Maintain a healthy lifestyle, consult a specialist if concerned."
}
# Function to generate additional risk factor recommendations
def generate_risk_factor_recommendations(row):
    risk_recommendations = []
    if row.get("Genetics") == "Yes":
        risk_recommendations.append("Consider genetic testing for personalized treatments.")
    if row.get("Hormonal Changes") == "Yes":
        risk_recommendations.append("Balance hormones with diet (rich in Omega-3 and Zinc).")
    if row.get("Stress") == "Yes":
        risk_recommendations.append("Practice meditation and deep breathing for stress control.")
    if row.get("Smoking") == "Yes":
        risk_recommendations.append("Quit smoking to improve blood circulation to the scalp.")
    return risk_recommendations

# Function to determine scalp condition from folder structure
def get_scalp_condition(user_id):
    for category, subcategories in scalp_categories.items():
        category_path = os.path.join(scalp_folder, category)
        if os.path.exists(category_path):
            for subcategory in subcategories:
                subcategory_path = os.path.join(category_path, subcategory)
                if os.path.exists(subcategory_path):
                    images = os.listdir(subcategory_path)
                    if any(str(user_id) in img for img in images):
                        return subcategory
    return "Unknown"
# Generate recommendations for each user and save to a file
def save_recommendations_to_drive(df, filename="recommendations.txt"):
    """Generates recommendations for each user and saves them to a file in Google Drive."""
    output_path = os.path.join("/content/drive/MyDrive/", filename)  # Path to save the file

    with open(output_path, "w") as f:
        for index, row in df.iterrows():
            user_id = row.get("User_ID", index + 1)
            scalp_condition = row["Scalp_Condition"]

            recs = recommendations.get(scalp_condition, default_recommendation)
            risk_recs = generate_risk_factor_recommendations(row)

            f.write(f"\n--- Recommendations for User {user_id} ({scalp_condition}) ---\n")
            f.write(f"Food Suggestions: {', '.join(recs['food'])}\n")
            f.write(f"Exercise Suggestions: {', '.join(recs['exercise'])}\n")
            f.write(f"Exercise Videos: {', '.join(recs['youtube_links'])}\n")
            f.write(f"Recommended AyurGenix Products: {', '.join(recs['products'])}\n")
            f.write(f"Hair Care Tips: {recs['tips']}\n")

            if risk_recs:
                f.write("Additional Tips Based on Questionnaire:\n")
                for tip in risk_recs:
                    f.write(f"   - {tip}\n")

    print(f"Recommendations saved to: {output_path}")

# ... (after all your other code) ...

# Call the function to save the recommendations
df["Scalp_Condition"] = df.index.map(lambda i: get_scalp_condition(i+1))
save_recommendations_to_drive(df)



Recommendations saved to: /content/drive/MyDrive/recommendations.txt
