In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.multioutput import MultiOutputClassifier

# Load the data
df = pd.read_csv('fruit.csv')



In [2]:
df.columns

Index(['UserID', 'Age', 'Gender', 'HealthCondition', 'RecommendedFruit',
       'RecommendedVegetable', 'Feedback'],
      dtype='object')

In [5]:
df['RecommendedVegetable'].unique()

array(['Banana', 'Apple', 'Berries', 'Orange', 'Strawberry', 'Kiwi',
       'Papaya', 'Guava', 'Peach', 'Mango', 'Cherry', 'Grape', 'Pear',
       'Watermelon', 'Pineapple', 'Apricot', 'Fig', 'Plum', 'Lemon',
       'Lime', 'Mandarin', 'Nectarine', 'Pomegranate', 'Raspberry',
       'Blueberry', 'Blackberry', 'Date', 'Dragon Fruit', 'Durian',
       'Elderberry', 'Grapefruit', 'Honeydew', 'Jackfruit', 'Kumquat',
       'Lychee', 'Mulberry', 'Nance', 'Olallieberry', 'Passion Fruit',
       'Quince', 'Rambutan', 'Sapote', 'Soursop', 'Tamarind',
       'Ugli Fruit', 'Voavanga', 'Xigua', 'Yellow Passion Fruit',
       'Zucchini'], dtype=object)

In [2]:
# Preprocessing
# Encode categorical variables
le_gender = LabelEncoder()
le_health = LabelEncoder()
le_fruit = LabelEncoder()
le_vegetable = LabelEncoder()

df['Gender'] = le_gender.fit_transform(df['Gender'])
df['HealthCondition'] = le_health.fit_transform(df['HealthCondition'])
df['RecommendedFruit'] = le_fruit.fit_transform(df['RecommendedFruit'])
df['RecommendedVegetable'] = le_vegetable.fit_transform(df['RecommendedVegetable'])

# Scale the 'Age' feature
scaler = StandardScaler()
df['Age'] = scaler.fit_transform(df[['Age']])

# Prepare features and targets
X = df[['Age', 'Gender', 'HealthCondition']]
y = df[['RecommendedFruit', 'RecommendedVegetable']]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
multi_output_rf = MultiOutputClassifier(rf_model, n_jobs=-1)
multi_output_rf.fit(X_train, y_train)

# Make predictions
y_pred = multi_output_rf.predict(X_test)

# Evaluate the model
print("Model Accuracy:")
print("Fruit Recommendations:", accuracy_score(y_test['RecommendedFruit'], y_pred[:, 0]))
print("Vegetable Recommendations:", accuracy_score(y_test['RecommendedVegetable'], y_pred[:, 1]))



Model Accuracy:
Fruit Recommendations: 0.125
Vegetable Recommendations: 0.075


In [3]:
# Function to get recommendations
def get_recommendations(age, gender, health_condition):
    # Preprocess input
    age_scaled = scaler.transform([[age]])[0][0]
    gender_encoded = le_gender.transform([gender])[0]
    health_encoded = le_health.transform([health_condition])[0]
    
    # Make prediction
    prediction = multi_output_rf.predict([[age_scaled, gender_encoded, health_encoded]])
    
    # Decode prediction
    fruit = le_fruit.inverse_transform(prediction[:, 0])[0]
    vegetable = le_vegetable.inverse_transform(prediction[:, 1])[0]
    
    return fruit, vegetable

In [4]:
# Example usage
age = 60
gender = 'Female'
health_condition = 'Diabetes'

fruit, vegetable = get_recommendations(age, gender, health_condition)
print(f"\nRecommendations for a {age}-year-old {gender} with {health_condition}:")
print(f"Recommended Fruit: {fruit}")
print(f"Recommended Vegetable: {vegetable}")






Recommendations for a 60-year-old Female with Diabetes:
Recommended Fruit: Dragon Fruit
Recommended Vegetable: Zucchini


In [5]:
import joblib

In [7]:
joblib.dump(multi_output_rf, 'model.sav')
joblib.dump(le_gender, 'le_gender.sav')
joblib.dump(le_health, 'le_health.sav')
joblib.dump(le_fruit, 'le_fruit.sav')
joblib.dump(le_vegetable, 'le_vegetable.sav')
joblib.dump(scaler, 'scaler.sav')

['scaler.sav']