In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
from google.colab import drive
from IPython.display import display
import joblib

# Step 1: Gathering, Preparing, and Processing data
# Step 1.1: Gathering Data
# Mount Google Drive
drive.mount('/content/drive')

# Path ke dataset di Google Drive
dataset_path = "/content/drive/MyDrive/dataset/Food Ingredients and Recipe Dataset with Image Name Mapping.csv"
images_path = "/content/drive/MyDrive/dataset/Food Images"

# Baca dataset menggunakan pandas, abaikan kolom index yang mungkin ada
df = pd.read_csv(dataset_path, index_col=False)

# Hapus kolom indeks yang tidak diinginkan (kolom "Unnamed: 0")
df.drop(df.columns[df.columns.str.contains('unnamed', case=False)], axis=1, inplace=True)

# Menampilkan beberapa baris pertama dari dataset
print("Data awal:")
display(df)

# Step 1.2: Preparing Data
# Menghapus kolom 'Ingredients'
df = df.drop(columns=['Ingredients'])

# Mengganti nama kolom 'Cleaned_Ingredients' menjadi 'Ingredients'
df = df.rename(columns={'Cleaned_Ingredients': 'Ingredients'})

# Membersihkan nilai null
df = df.dropna()

# Membersihkan nilai yang duplicated
df = df.drop_duplicates()

# Mengubah urutan kolom
df = df[['Title', 'Image_Name', 'Ingredients', 'Instructions']]

# Reset index untuk memastikan tidak ada kolom index yang tidak diinginkan
df.reset_index(drop=True, inplace=True)

# Menampilkan beberapa baris pertama setelah preprocessing
print("\nData setelah preprocessing:")
display(df)

def prepare_data():
    data = {
        'Title': ['Chicken Curry', 'Peanut Butter Sandwich', 'Milkshake', 'Grilled Chicken', 'Vegan Salad'],
        'Image_Name': ['chicken_curry.jpg', 'peanut_butter_sandwich.jpg', 'milkshake.jpg', 'grilled_chicken.jpg', 'vegan_salad.jpg'],
        'Ingredients': ['chicken, curry powder', 'peanut butter, bread', 'milk, ice cream', 'chicken, spices', 'lettuce, tomato'],
        'Instructions': ['Cook chicken...', 'Spread peanut butter...', 'Blend milk...', 'Grill chicken...', 'Mix veggies...']
    }
    df = pd.DataFrame(data)
    df['Text'] = df['Title'] + ' ' + df['Instructions']
    return df

# Step 2: Building, Training, and Deploying model
def build_tfidf_model(df):
    tfidf_vectorizer = TfidfVectorizer(stop_words='english')
    X = tfidf_vectorizer.fit_transform(df['Text'])
    return tfidf_vectorizer, X

def build_collaborative_filtering_model(df):
    mlb = MultiLabelBinarizer()
    ingredients_matrix = mlb.fit_transform(df['Ingredients'].str.split(', '))
    knn_model = NearestNeighbors(metric='cosine', algorithm='brute')
    knn_model.fit(ingredients_matrix)
    return mlb, knn_model

def recommend_recipes_for_allergy(df, user_allergies, search_keyword, tfidf_vectorizer, tfidf_matrix):
    search_vector = tfidf_vectorizer.transform([search_keyword])
    cosine_similarities = cosine_similarity(search_vector, tfidf_matrix)
    recommended_indices = cosine_similarities.argsort()[0][-10:][::-1]
    recommended_recipes = df.iloc[recommended_indices]
    for allergy in user_allergies:
        recommended_recipes = recommended_recipes[~recommended_recipes['Ingredients'].str.lower().str.contains(allergy.lower())]
    return recommended_recipes

def train_and_deploy_model(df, user_allergies, search_keyword):
    tfidf_vectorizer, tfidf_matrix = build_tfidf_model(df)
    mlb, knn_model = build_collaborative_filtering_model(df)
    recommended_recipes = recommend_recipes_for_allergy(df, user_allergies, search_keyword, tfidf_vectorizer, tfidf_matrix)

    if recommended_recipes.empty:
        print("No suitable recipes found for the given keyword and allergies.")
        return None, None, None

    # Saving the models
    tfidf_model_path = 'tfidf_model.pkl'
    collaborative_model_path = 'collaborative_model.h5'

    # Save the TF-IDF model using joblib
    joblib.dump(tfidf_vectorizer, tfidf_model_path)

    # Convert collaborative model to a Keras model and save
    collaborative_keras_model = convert_sklearn_to_keras(knn_model, mlb, df)
    collaborative_keras_model.save(collaborative_model_path)

    # Convert the Keras models to TensorFlow Lite
    convert_to_tflite(collaborative_model_path, 'collaborative_model.tflite')

    return recommended_recipes, tfidf_model_path, collaborative_model_path

def convert_sklearn_to_keras(knn_model, mlb, df):
    input_layer = Input(shape=(len(mlb.classes_),))
    dense_layer = Dense(1, activation='sigmoid')(input_layer)
    model = Model(inputs=input_layer, outputs=dense_layer)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    dummy_data = np.zeros((1, len(mlb.classes_)))
    dummy_labels = np.array([0])
    model.fit(dummy_data, dummy_labels, epochs=1, batch_size=1)

    return model

def convert_to_tflite(model_path, tflite_model_path):
    converter = tf.lite.TFLiteConverter.from_keras_model(tf.keras.models.load_model(model_path))
    tflite_model = converter.convert()
    with open(tflite_model_path, 'wb') as f:
        f.write(tflite_model)

# Step 3: Evaluating, Optimizing the training model
def evaluate_model(df, user_allergies):
    train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
    tfidf_vectorizer, tfidf_matrix = build_tfidf_model(train_df)
    recommended_recipes = recommend_recipes_for_allergy(test_df, user_allergies, 'chicken', tfidf_vectorizer, tfidf_matrix)
    true_labels = test_df['Title'].tolist()
    recommended_labels = recommended_recipes['Title'].tolist()
    precision = precision_score(true_labels, recommended_labels, average='weighted', zero_division=1)
    recall = recall_score(true_labels, recommended_labels, average='weighted', zero_division=1)
    f1 = f1_score(true_labels, recommended_labels, average='weighted', zero_division=1)
    return precision, recall, f1

# Step 4: Finalize training model machine and Deployment
def finalize_training_and_deployment(df, user_allergies, search_keyword):
    tfidf_vectorizer, tfidf_matrix = build_tfidf_model(df)
    mlb, knn_model = build_collaborative_filtering_model(df)
    recommended_recipes = recommend_recipes_for_allergy(df, user_allergies, search_keyword, tfidf_vectorizer, tfidf_matrix)

    if recommended_recipes.empty:
        print("No suitable recipes found for the given keyword and allergies.")
        return None, None, None

    # Saving the models
    tfidf_model_path = 'tfidf_model.pkl'
    collaborative_model_path = 'model.h5'

    # Save the TF-IDF model using joblib
    joblib.dump(tfidf_vectorizer, tfidf_model_path)

    # Convert collaborative model to a Keras model and save
    collaborative_keras_model = convert_sklearn_to_keras(knn_model, mlb, df)
    collaborative_keras_model.save(collaborative_model_path)

    # Convert the Keras models to TensorFlow Lite
    convert_to_tflite(collaborative_model_path, 'collaborative_model.tflite')

    # Saving data recommendations
    data_filename = 'recommended_recipes.csv'
    recommended_recipes.to_csv(data_filename, index=False)

    return tfidf_model_path, collaborative_model_path, data_filename

# Example usage
df = prepare_data()
user_allergies = ['milk', 'peanut']
search_keyword = 'chicken'
recommended_recipes, tfidf_model_path, collaborative_model_path = train_and_deploy_model(df, user_allergies, search_keyword)

if recommended_recipes is not None:
    print("\nRecommended Recipes:")
    display(recommended_recipes)
    print("\nTF-IDF model saved as:", tfidf_model_path)
    print("Collaborative model saved as:", collaborative_model_path)
else:
    print("No suitable recipes found for the given keyword and allergies.")
