In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd

# Load datasets
all_diets_df = pd.read_csv('/kaggle/input/healthy-diet-recipes-a-comprehensive-dataset/All_Diets.csv')
mediterranean_df = pd.read_csv('/kaggle/input/healthy-diet-recipes-a-comprehensive-dataset/mediterranean.csv')
paleo_df = pd.read_csv('/kaggle/input/healthy-diet-recipes-a-comprehensive-dataset/paleo.csv')
vegan_df = pd.read_csv('/kaggle/input/healthy-diet-recipes-a-comprehensive-dataset/vegan.csv')
keto_df = pd.read_csv('/kaggle/input/healthy-diet-recipes-a-comprehensive-dataset/keto.csv')
dash_df = pd.read_csv('/kaggle/input/healthy-diet-recipes-a-comprehensive-dataset/dash.csv')

# Add 'Diet_Type' column to each dataframe
mediterranean_df['Diet_Type'] = 'Mediterranean'
paleo_df['Diet_Type'] = 'Paleo'
vegan_df['Diet_Type'] = 'Vegan'
keto_df['Diet_Type'] = 'Keto'
dash_df['Diet_Type'] = 'DASH'

# Combine all dataframes
combined_df = pd.concat([mediterranean_df, paleo_df, vegan_df, keto_df, dash_df])

# Clean data (example: drop duplicates, handle missing values)
combined_df.drop_duplicates(inplace=True)
combined_df.fillna('', inplace=True)

print(combined_df.head())


In [None]:
import pandas as pd

# Set option to display all columns
pd.set_option('display.max_columns', None)

# Load your dataset (example)
combined_df = pd.read_csv('/kaggle/input/healthy-diet-recipes-a-comprehensive-dataset/All_Diets.csv')

# Display the DataFrame with all columns visible
print(combined_df.head()) 


In [None]:
import pandas as pd

# Set display option to show all columns
pd.set_option('display.max_columns', None)

# Load datasets
paleo_df = pd.read_csv('/kaggle/input/healthy-diet-recipes-a-comprehensive-dataset/paleo.csv')
mediterranean_df = pd.read_csv('/kaggle/input/healthy-diet-recipes-a-comprehensive-dataset/mediterranean.csv')
vegan_df = pd.read_csv('/kaggle/input/healthy-diet-recipes-a-comprehensive-dataset/vegan.csv')
keto_df = pd.read_csv('/kaggle/input/healthy-diet-recipes-a-comprehensive-dataset/keto.csv')
dash_df = pd.read_csv('/kaggle/input/healthy-diet-recipes-a-comprehensive-dataset/dash.csv')

# Add 'Diet_type' column to each dataframe if not present
mediterranean_df['Diet_type'] = 'Mediterranean'
paleo_df['Diet_type'] = 'Paleo'
vegan_df['Diet_type'] = 'Vegan'
keto_df['Diet_type'] = 'Keto'
dash_df['Diet_type'] = 'DASH'

# Combine all dataframes into one
combined_df = pd.concat([paleo_df, mediterranean_df, vegan_df, keto_df, dash_df], ignore_index=True)

# Clean the data (e.g., drop duplicates, handle missing values)
combined_df.drop_duplicates(inplace=True)
combined_df.fillna('', inplace=True)

# Display the first few rows of the DataFrame to check the combined dataset
print(combined_df.head())


In [None]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Encode 'Diet_type' and 'Cuisine_type' as categorical variables
diet_type_encoder = LabelEncoder()
combined_df['Diet_type_Encoded'] = diet_type_encoder.fit_transform(combined_df['Diet_type'])

cuisine_type_encoder = LabelEncoder()
combined_df['Cuisine_type_Encoded'] = cuisine_type_encoder.fit_transform(combined_df['Cuisine_type'])

# Select numerical features
numerical_features = combined_df[['Protein(g)', 'Carbs(g)', 'Fat(g)']]

# Standardize numerical features
scaler = StandardScaler()
numerical_features_scaled = scaler.fit_transform(numerical_features)

# Combine all features into a single feature set
import numpy as np

features = np.hstack((numerical_features_scaled, combined_df[['Diet_type_Encoded', 'Cuisine_type_Encoded']].values))

# Define the target variable (for example, high protein recipes)
target = (combined_df['Protein(g)'] > 50).astype(int)  # Example target: High protein (1) vs. Low protein (0)

print("Feature set and target variable prepared.")


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Initialize and train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
print("Classification Report:\n", classification_report(y_test, y_pred))


In [None]:
import random

def get_random_recipe(df, diet_type=None):
    """
    Function to return a random recipe.
    If diet_type is specified, return a random recipe from that diet.
    """
    if diet_type:
        filtered_df = df[df['Diet_type'].str.lower() == diet_type.lower()]
    else:
        filtered_df = df
    
    if not filtered_df.empty:
        random_index = random.choice(filtered_df.index)
        return filtered_df.loc[random_index]
    else:
        return "No recipes found for the specified diet type."

# Example usage
print("Random Vegan Recipe:")
print(get_random_recipe(combined_df, diet_type='Vegan'))

print("\nRandom Recipe from Any Diet:")
print(get_random_recipe(combined_df))


In [None]:
import joblib

# Save the trained model to a file
joblib.dump(model, 'diet_recipe_model.pkl')
print("Model saved successfully.")


In [None]:
# Load the saved model
loaded_model = joblib.load('diet_recipe_model.pkl')

# Use the loaded model to make predictions
loaded_y_pred = loaded_model.predict(X_test)
print(f"Loaded Model Accuracy: {accuracy_score(y_test, loaded_y_pred):.2f}")


In [None]:
import random

def provide_random_recipe(df, model=None, features=None, use_model=False):
    """
    Function to provide a random recipe.
    If use_model is True, use the trained model to filter recipes based on high protein content or other criteria.
    
    Parameters:
    - df: DataFrame containing recipes.
    - model: Trained machine learning model (optional).
    - features: Features used for model prediction (optional).
    - use_model: Boolean flag to indicate whether to use the model for filtering.
    
    Returns:
    - A randomly selected recipe.
    """
    if use_model and model is not None and features is not None:
        # Predict high protein recipes using the model
        predictions = model.predict(features)
        high_protein_recipes = df[predictions == 1]  # Filter recipes with high protein
        
        if not high_protein_recipes.empty:
            random_index = random.choice(high_protein_recipes.index)
            return high_protein_recipes.loc[random_index]
        else:
            return "No high protein recipes found."
    else:
        # Return a random recipe from the entire dataset
        random_index = random.choice(df.index)
        return df.loc[random_index]

# Example usage
random_recipe = provide_random_recipe(combined_df)
print("Random Recipe:\n", random_recipe)

# Example usage with model
random_high_protein_recipe = provide_random_recipe(combined_df, model=model, features=features, use_model=True)
print("\nRandom High Protein Recipe:\n", random_high_protein_recipe)
