In [8]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.23.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import gradio as gr
import warnings
warnings.filterwarnings('ignore')

# Step 1: Load and Preprocess the Data
try:
    df = pd.read_excel('Untitled spreadsheet.xlsx')
    print("Data loaded successfully!")
except FileNotFoundError:
    print("Error: 'Untitled spreadsheet.xlsx' not found. Please ensure the file exists.")
    exit()
except Exception as e:
    print(f"An error occurred: {e}")
    exit()

# Remove irrelevant columns
irrelevant_cols = ['AID', 'Breathing', 'Awareness', 'Suitability']
df = df.drop(columns=irrelevant_cols, errors='ignore')

# Handle missing values
df['Contraindications'] = df['Contraindications'].fillna('none')
df['Targeted Mental Problems'] = df['Targeted Mental Problems'].fillna('none')
df['Targeted Physical Problems'] = df['Targeted Physical Problems'].fillna('none')

# Standardize text (lowercase for consistency)
for col in ['Level', 'Target Areas', 'Targeted Mental Problems', 'Targeted Physical Problems', 'Weight Goal Alignment']:
    df[col] = df[col].str.lower()

# Encode categorical variables
le_level = LabelEncoder()
df['Level'] = le_level.fit_transform(df['Level'])

le_target_areas = LabelEncoder()
df['Target Areas'] = le_target_areas.fit_transform(df['Target Areas'])

le_weight_goal = LabelEncoder()
df['Weight Goal Alignment'] = le_weight_goal.fit_transform(df['Weight Goal Alignment'])

# Multi-label binarize mental and physical problems
mlb_mental = MultiLabelBinarizer()
mental_problems = mlb_mental.fit_transform(df['Targeted Mental Problems'].str.split(', '))
mental_df = pd.DataFrame(mental_problems, columns=mlb_mental.classes_)

mlb_physical = MultiLabelBinarizer()
physical_problems = mlb_physical.fit_transform(df['Targeted Physical Problems'].str.split(', '))
physical_df = pd.DataFrame(physical_problems, columns=mlb_physical.classes_)

# Combine features into a single DataFrame
pose_features = pd.concat([df[['Level', 'Target Areas', 'Weight Goal Alignment']], mental_df, physical_df], axis=1)

def generate_training_data(df, pose_features, n_samples=1000):
    X, y = [], []
    all_mental_problems = list(mlb_mental.classes_)
    all_physical_problems = list(mlb_physical.classes_)
    target_areas_options = ['arms', 'legs', 'core', 'back', 'flexibility', 'balance']

    # Get encoded values for weight goals
    both_encoded = le_weight_goal.transform(['both'])[0] if 'both' in le_weight_goal.classes_ else None
    lose_encoded = [le_weight_goal.transform([wg])[0] for wg in le_weight_goal.classes_ if 'lose' in wg]
    gain_encoded = [le_weight_goal.transform([wg])[0] for wg in le_weight_goal.classes_ if 'gain' in wg]

    for _ in range(n_samples):
        # Simulate user profile
        height = np.random.randint(150, 200)
        weight = np.random.randint(50, 120)
        age = np.random.randint(20, 80)
        problem_type = np.random.choice(['mental', 'physical'])
        specific_problem = (np.random.choice(all_mental_problems) if problem_type == 'mental'
                           else np.random.choice(all_physical_problems))
        target_areas = np.random.choice(target_areas_options, size=np.random.randint(1, 4), replace=False)
        weight_goal = np.random.choice(['lose weight', 'gain muscle'])

        for idx, row in df.iterrows():
            # User features
            user_features = [
                height,
                weight,
                age,
                1 if problem_type == 'mental' else 0,
                1 if specific_problem in row['Targeted Mental Problems'].split(', ') or
                     specific_problem in row['Targeted Physical Problems'].split(', ') else 0,
                1 if 'arms' in target_areas else 0,
                1 if 'legs' in target_areas else 0,
                1 if 'core' in target_areas else 0,
                1 if 'back' in target_areas else 0,
                1 if 'flexibility' in target_areas else 0,
                1 if 'balance' in target_areas else 0,
                1 if weight_goal == 'lose weight' else 0
            ]

            # Pose features
            pose_vec = pose_features.iloc[idx].values

            # Label: 1 if suitable, 0 if not
            age_ok = age < 60
            weight_ok = weight < 100
            contra_ok = specific_problem not in row['Contraindications'].lower()
            problem_match = (problem_type == 'mental' and specific_problem in row['Targeted Mental Problems']) or \
                            (problem_type == 'physical' and specific_problem in row['Targeted Physical Problems'])

            # Target area match (assuming this is already fixed similarly)
            encoded_user_target_areas = [le_target_areas.transform([area])[0] for area in target_areas
                                        if area in le_target_areas.classes_]
            target_area_match = row['Target Areas'] in encoded_user_target_areas if encoded_user_target_areas else False

            # Fixed weight goal match
            pose_weight_goal = row['Weight Goal Alignment']
            if both_encoded is not None and pose_weight_goal == both_encoded:
                weight_goal_match = True
            elif weight_goal == 'lose weight' and any(pose_weight_goal == le for le in lose_encoded):
                weight_goal_match = True
            elif weight_goal == 'gain muscle' and any(pose_weight_goal == ge for ge in gain_encoded):
                weight_goal_match = True
            else:
                weight_goal_match = False

            label = 1 if age_ok and weight_ok and contra_ok and problem_match and target_area_match and weight_goal_match else 0

            X.append(np.concatenate([user_features, pose_vec]))
            y.append(label)

    return np.array(X), np.array(y)

# Call the function
X, y = generate_training_data(df, pose_features)
X, y = generate_training_data(df, pose_features)

# Step 3: Train the Model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print(f"Model Accuracy: {accuracy_score(y_test, y_pred):.2f}")

In [5]:
import joblib

# Save the trained model
joblib.dump(model, 'yoga_recommendation_model.pkl')

# Save all encoders
joblib.dump(le_level, 'le_level.pkl')
joblib.dump(le_target_areas, 'le_target_areas.pkl')
joblib.dump(le_weight_goal, 'le_weight_goal.pkl')
joblib.dump(mlb_mental, 'mlb_mental.pkl')
joblib.dump(mlb_physical, 'mlb_physical.pkl')

['mlb_physical.pkl']

In [9]:
# Save the dataset
df.to_excel('data.xlsx', index=False)