In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import pickle

In [7]:


# Train model with KNN
knn_model = KNeighborsClassifier(n_neighbors=5)  # You can tune `n_neighbors`
knn_model.fit(X_train, y_train)

# Make predictions
y_pred_knn = knn_model.predict(X_test)

# Calculate accuracy
accuracy_knn = accuracy_score(y_test, y_pred_knn)
print(f"KNN Model Accuracy: {accuracy_knn * 100:.2f}%")

# Save model
with open("diet_exercise_knn_model.pkl", "wb") as file:
    pickle.dump(knn_model, file)

print("KNN Model trained and saved successfully!")


KNN Model Accuracy: 36.00%
KNN Model trained and saved successfully!


In [16]:


# Load dataset
file_path = "diet_exercise_data.csv"  # Ensure this is the correct path
data = pd.read_csv(file_path)

# Encode categorical features
label_encoder_activity = LabelEncoder()
data['Physical_Activity_Level'] = label_encoder_activity.fit_transform(data['Physical_Activity_Level'])

label_encoder_diet = LabelEncoder()
data['Diet_Recommendation'] = label_encoder_diet.fit_transform(data['Diet_Recommendation'])

# Features & Labels
X = data[['Age', 'Weight_kg', 'Height_cm', 'Physical_Activity_Level']]  # Input features
y = data['Diet_Recommendation']  # Output (encoded diet plan)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.9, random_state=99)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=18)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Save model for Spring Boot
with open("diet_exercise_model.pkl", "wb") as file:
    pickle.dump(model, file)

print("Model trained and saved successfully!")


Model Accuracy: 36.67%
Model trained and saved successfully!


In [15]:


# Train model with SVM
svm_model = SVC(kernel='rbf', random_state=18)
svm_model.fit(X_train, y_train)

# Make predictions
y_pred_svm = svm_model.predict(X_test)

# Calculate accuracy
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f"SVM Model Accuracy: {accuracy_svm * 100:.2f}%")

# Save model
with open("diet_exercise_svm_model.pkl", "wb") as file:
    pickle.dump(svm_model, file)

print("SVM Model trained and saved successfully!")


SVM Model Accuracy: 33.67%
SVM Model trained and saved successfully!


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE  # To handle class imbalance
import pickle

# Load dataset
file_path = "diet_exercise_data.csv"  # Ensure correct path
data = pd.read_csv(file_path)

# Drop unnecessary columns
if {'Patient_ID', 'Preferred_Cuisine', 'Allergies'}.issubset(data.columns):
    data = data.drop(columns=['Patient_ID', 'Preferred_Cuisine', 'Allergies'])

# Check for missing values & drop rows with NaN values
data = data.dropna()

# Encode categorical features
label_encoder_activity = LabelEncoder()
data['Physical_Activity_Level'] = label_encoder_activity.fit_transform(data['Physical_Activity_Level'])

label_encoder_diet = LabelEncoder()
data['Diet_Recommendation'] = label_encoder_diet.fit_transform(data['Diet_Recommendation'])

# Features & Labels
X = data[['Age', 'Weight_kg', 'Height_cm', 'BMI', 'Cholesterol_mg/dL', 'Blood_Pressure_mmHg',
          'Glucose_mg/dL', 'Weekly_Exercise_Hours', 'Dietary_Nutrient_Imbalance_Score',
          'Physical_Activity_Level']]
y = data['Diet_Recommendation']  # Output (encoded diet plan)

# Handle Class Imbalance using SMOTE (Synthetic Minority Oversampling)
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Split into training and test sets (using 80-20 split for better generalization)
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Feature Scaling (Not necessary for RandomForest but useful for other models)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train an optimized RandomForest model with better hyperparameters
model = RandomForestClassifier(n_estimators=300, max_depth=20, min_samples_split=9, min_samples_leaf=2,
                               class_weight="balanced", random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate model performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Optimized Model Accuracy: {accuracy * 100:.2f}%")
print(classification_report(y_test, y_pred))

# Save model for Spring Boot
with open("diet_exercise_model.pkl", "wb") as file:
    pickle.dump(model, file)

# Save scaler for consistent transformations in the Spring Boot app
with open("scaler.pkl", "wb") as file:
    pickle.dump(scaler, file)

print("Model and scaler trained and saved successfully!")


Optimized Model Accuracy: 55.47%
              precision    recall  f1-score   support

           0       0.59      0.68      0.63        47
           1       0.57      0.54      0.55        48
           2       0.49      0.43      0.46        42

    accuracy                           0.55       137
   macro avg       0.55      0.55      0.55       137
weighted avg       0.55      0.55      0.55       137

Model and scaler trained and saved successfully!


In [3]:
# Load dataset
import pandas as pd
file_path = "diet_exercise_data.csv"  # Ensure this is the correct path
data = pd.read_csv(file_path)

In [4]:
data['Diet_Recommendation'].unique()

array(['Balanced', 'Low_Carb', 'Low_Sodium'], dtype=object)

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 20 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Patient_ID                        1000 non-null   object 
 1   Age                               1000 non-null   int64  
 2   Gender                            1000 non-null   object 
 3   Weight_kg                         1000 non-null   float64
 4   Height_cm                         1000 non-null   int64  
 5   BMI                               1000 non-null   float64
 6   Disease_Type                      796 non-null    object 
 7   Severity                          1000 non-null   object 
 8   Physical_Activity_Level           1000 non-null   object 
 9   Daily_Caloric_Intake              1000 non-null   int64  
 10  Cholesterol_mg/dL                 1000 non-null   float64
 11  Blood_Pressure_mmHg               1000 non-null   int64  
 12  Glucose

In [12]:
allergy = data['Allergies'].unique() # replace nan by 0, peanuts by 1. gluten by 2

In [13]:
Dietary = data['Dietary_Restrictions'].unique() # replace nan by 0, Low_Sugar by 1. Low_Sodium by 2

In [14]:
disease = data['Disease_Type'].unique() # replace nan by 0, Obesity by 1. Diabetes by 2, Hypertension by 3

In [8]:
data.isnull().sum()

Patient_ID                            0
Age                                   0
Gender                                0
Weight_kg                             0
Height_cm                             0
BMI                                   0
Disease_Type                        204
Severity                              0
Physical_Activity_Level               0
Daily_Caloric_Intake                  0
Cholesterol_mg/dL                     0
Blood_Pressure_mmHg                   0
Glucose_mg/dL                         0
Dietary_Restrictions                334
Allergies                           323
Preferred_Cuisine                     0
Weekly_Exercise_Hours                 0
Adherence_to_Diet_Plan                0
Dietary_Nutrient_Imbalance_Score      0
Diet_Recommendation                   0
dtype: int64

In [None]:
import numpy as np
data['Category'] = np.where((df['Category'] == 'A') & (df['Value'] > 20), 'X',
                 np.where((df['Category'] == 'B') & (df['Value'] > 20), 'Y', df['Category']))

print(df)
