<a href="https://colab.research.google.com/github/BRV12G/Final_year_Project/blob/main/svm%20on%20new%20diet%20dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the dataset
file_path = '/content/nutrition_dataset_with_fiber_water_intake_updated.csv'  # Update this path
data = pd.read_csv(file_path)

# Inspect column names
print("Dataset Columns:", data.columns.tolist())

# Drop the Person ID column as it's not useful for classification
data = data.drop(columns=['Person ID'])

# Encode categorical variables
label_encoders = {}
for column in data.select_dtypes(include=['object']).columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Ensure all targets are encoded properly as categorical
output_features = [
    'Health Status', 'BMI Values', 'BMI Class', 'Calories (kcal)', 'Carbohydrates (g)',
    'Proteins (g)', 'Fats (g)', 'Vitamin A (mcg)', 'Vitamin C (mg)',
    'Vitamin D (mcg)', 'Sodium (mg)', 'Potassium (mg)', 'Magnesium (mg)',
    'Iron (mg)', 'Zinc (mg)', 'Fiber Intake (g)', 'Water Intake (L)'
]

for feature in output_features:
    if feature in data.columns and data[feature].dtype != 'int':
        label_encoders[feature] = LabelEncoder()
        data[feature] = label_encoders[feature].fit_transform(data[feature])

# Define input features
input_features = [
    'Gender', 'Age', 'Occupation', 'Sleep Duration', 'Quality of Sleep',
    'Activity Level', 'Stress Level', 'Blood Pressure Category',
    'Systolic', 'Diastolic', 'Heart Rate', 'Daily Steps',
    'Height (cm)', 'Weight (kg)'
]

# Split the data into features (X) and targets (y)
X = data[input_features]
y = data[output_features]

# Scale numerical features for SVM performance with Standard Scaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train and evaluate models for each output feature
models = {}
results = {}
overall_accuracy_sum = 0  # To calculate the mean accuracy across all models

for target in output_features:
    # Define target-specific training and testing data
    y_train_target = y_train[target]
    y_test_target = y_test[target]

    # Train the SVM classifier
    model = SVC(kernel='rbf', C=1, gamma='scale', random_state=42)  # SVM Configuration
    model.fit(X_train, y_train_target)
    models[target] = model

    # Test the model
    y_pred = model.predict(X_test)

    # Evaluate the model
    accuracy = accuracy_score(y_test_target, y_pred)
    overall_accuracy_sum += accuracy
    results[target] = {
        'accuracy': accuracy,
        'classification_report': classification_report(y_test_target, y_pred, zero_division=0),
        'confusion_matrix': confusion_matrix(y_test_target, y_pred)
    }

    # Print evaluation metrics
    print(f"\nTarget: {target}")
    print(f"Accuracy: {accuracy}")
    print("Confusion Matrix:\n", results[target]['confusion_matrix'])

# Calculate and display overall model accuracy
overall_accuracy = overall_accuracy_sum / len(output_features)
print(f"\nOverall Model Accuracy (Mean Accuracy across targets): {overall_accuracy:.4f}")

# Function to predict multiple outputs for new user inputs
def predict_user_input():
    # Gather user input
    print("\nEnter the following details:")
    user_data = {
        'Gender': input("Gender (Male/Female): "),
        'Age': int(input("Age: ")),
        'Occupation': input("Occupation: "),
        'Sleep Duration': int(input("Sleep Duration (hours): ")),
        'Quality of Sleep': input("Quality of Sleep (Excellent/Good/Fair/Poor): "),
        'Activity Level': input("Activity Level (Low/Medium/High): "),
        'Stress Level': input("Stress Level (Low/Medium/High): "),
        'Blood Pressure Category': input("Blood Pressure Category (Normal/Prehypertension/Hypertension): "),
        'Systolic': int(input("Systolic Blood Pressure: ")),
        'Diastolic': int(input("Diastolic Blood Pressure: ")),
        'Heart Rate': int(input("Heart Rate: ")),
        'Daily Steps': int(input("Daily Steps: ")),
        'Height (cm)': int(input("Height (cm): ")),
        'Weight (kg)': int(input("Weight (kg): "))
    }

    # Convert input to DataFrame
    input_df = pd.DataFrame([user_data])

    # Encode categorical features
    for column, encoder in label_encoders.items():
        if column in input_df.columns:
            try:
                input_df[column] = encoder.transform(input_df[column])
            except ValueError:
                # Assign the most frequent category for unseen labels
                input_df[column] = encoder.transform([data[column].mode()[0]])

    # Scale the user input using the same scaler as training
    input_df_scaled = scaler.transform(input_df)

    # Predict outputs for each target feature
    predictions = {}
    for target, model in models.items():
        prediction = model.predict(input_df_scaled)
        if target in label_encoders:  # Decode categorical outputs
            predictions[target] = label_encoders[target].inverse_transform(prediction)[0]
        else:
            predictions[target] = prediction[0]

    # Display predictions
    print("\nPredicted Outputs:")
    for key, value in predictions.items():
        print(f"{key}: {value}")

# Run prediction function
predict_user_input()


Dataset Columns: ['Person ID', 'Gender', 'Age', 'Occupation', 'Sleep Duration', 'Quality of Sleep', 'Activity Level', 'Stress Level', 'Weight (kg)', 'Height (cm)', 'Blood Pressure Category', 'Systolic', 'Diastolic', 'Heart Rate', 'Daily Steps', 'Health Status', 'BMI Values', 'BMI Class', 'Calories (kcal)', 'Carbohydrates (g)', 'Proteins (g)', 'Fats (g)', 'Vitamin A (mcg)', 'Vitamin C (mg)', 'Vitamin D (mcg)', 'Sodium (mg)', 'Potassium (mg)', 'Magnesium (mg)', 'Iron (mg)', 'Zinc (mg)', 'Fiber Intake (g)', 'Water Intake (L)']

Target: Health Status
Accuracy: 0.8135
Confusion Matrix:
 [[  13  737]
 [   9 3241]]

Target: BMI Values
Accuracy: 0.013
Confusion Matrix:
 [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]

Target: BMI Class
Accuracy: 0.942
Confusion Matrix:
 [[2356    0   29    0]
 [  35    0   70    0]
 [  51    0 1411    0]
 [  47    0    0    1]]

Target: Calories (kcal)
Accuracy: 0.00525
Confusion Matrix:
 