In [17]:
import os
import sys
import pickle
from sklearn.mixture import GaussianMixture
from preprocess import preprocess_data

# Set project directory
os.chdir("d:/Health_and_Wellness/")  
sys.path.append("./backend")  

# Load dataset
df = preprocess_data("dataset/pcos_data.csv")
df.columns = df.columns.str.strip()  # Remove spaces

# Debug: Print cleaned column names
print("Cleaned column names:", df.columns.tolist())

# Define expected feature names
feature_names = ["Age (yrs)", "Weight (Kg)", "Height(Cm)", "BMI", "Reg.Exercise(Y/N)"]

# Ensure only available columns are selected
existing_columns = [col for col in feature_names if col in df.columns]
df = df[existing_columns]

print("Final selected columns:", df.columns.tolist())

# Train GMM Model
# Train GMM Model
num_clusters = 3
gmm = GaussianMixture(n_components=num_clusters, random_state=42)

# ✅ Fit model using only original features
df["Cluster_GMM"] = gmm.fit_predict(df[existing_columns])

print("Clusters assigned:", df["Cluster_GMM"].unique())

# ✅ Save model correctly (model + feature names)
with open("models/pcos_gmm_model.pkl", "wb") as file:
    pickle.dump((gmm, existing_columns), file)  # Save only original feature names

print("Model training complete and saved.")

# ✅ Load model correctly
with open("models/pcos_gmm_model.pkl", "rb") as file:
    loaded_model, loaded_features = pickle.load(file)  # Unpack correctly

# Debug: Check loaded model type
print("Loaded model trained on features:", loaded_features)
print("Loaded model type:", type(loaded_model))

# ✅ Ensure model can predict
if isinstance(loaded_model, GaussianMixture):
    sample_input = df[loaded_features].iloc[:5]  # Select test data using correct features
    predictions = loaded_model.predict(sample_input)
    print("Predicted clusters:", predictions)
else:
    print("Error: Model did not load correctly.")


Cleaned column names: ['PCOS (Y/N)', 'Age (yrs)', 'Weight (Kg)', 'Height(Cm)', 'BMI', 'Blood Group', 'Pulse rate(bpm)', 'RR (breaths/min)', 'Hb(g/dl)', 'Cycle(R/I)', 'Cycle length(days)', 'Marraige Status (Yrs)', 'Pregnant(Y/N)', 'No. of abortions', 'I   beta-HCG(mIU/mL)', 'II    beta-HCG(mIU/mL)', 'FSH(mIU/mL)', 'LH(mIU/mL)', 'FSH/LH', 'Hip(inch)', 'Waist(inch)', 'Waist:Hip Ratio', 'TSH (mIU/L)', 'AMH(ng/mL)', 'PRL(ng/mL)', 'Vit D3 (ng/mL)', 'PRG(ng/mL)', 'RBS(mg/dl)', 'Weight gain(Y/N)', 'hair growth(Y/N)', 'Skin darkening (Y/N)', 'Hair loss(Y/N)', 'Pimples(Y/N)', 'Fast food (Y/N)', 'Reg.Exercise(Y/N)', 'BP _Systolic (mmHg)', 'BP _Diastolic (mmHg)', 'Follicle No. (L)', 'Follicle No. (R)', 'Avg. F size (L) (mm)', 'Avg. F size (R) (mm)', 'Endometrium (mm)', 'Unnamed: 44']
Final selected columns: ['Age (yrs)', 'Weight (Kg)', 'Height(Cm)', 'BMI', 'Reg.Exercise(Y/N)']
Clusters assigned: [1 2 0]
Model training complete and saved.
Loaded model trained on features: ['Age (yrs)', 'Weight (Kg)

  df = df.applymap(lambda x: str(x).replace(".", "", 1) if isinstance(x, str) else x)


In [13]:
import os
print("Model exists:", os.path.exists("models/pcos_gmm_model.pkl"))


Model exists: True


In [2]:
import os
print(os.getcwd())  # ✅ This shows the current directory


d:\Health_and_Wellness\notebooks


In [3]:
import sys
sys.path.append("./backend")  # Add backend folder

print(sys.path)  # ✅ Check if "./backend" is listed


['d:\\Apurva\\python310.zip', 'd:\\Apurva\\DLLs', 'd:\\Apurva\\lib', 'd:\\Apurva', '', 'd:\\Apurva\\lib\\site-packages', 'd:\\Apurva\\lib\\site-packages\\win32', 'd:\\Apurva\\lib\\site-packages\\win32\\lib', 'd:\\Apurva\\lib\\site-packages\\Pythonwin', './backend', './backend']


In [4]:
import os
print(os.path.exists("backend/preprocess.py"))  # ✅ Should print True


False
