In [None]:
# Step 0: Install required packages
!pip install pandas scikit-learn joblib --quiet

# Step 1: Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
import joblib
from google.colab import files

# Step 2: Load dataset
file_path = '/content/recipes_10000_v2.csv'  # Specify the path to your CSV file

try:
    # Read the CSV file, skipping bad lines and specifying encoding
    df = pd.read_csv(file_path, on_bad_lines='skip', encoding='latin-1')

except FileNotFoundError:
    print(f"Error: The file was not found at {file_path}. Please make sure the file is in the correct location.")
except Exception as e:
    print(f"An error occurred during file loading: {e}")

# Step 3: Filter rare cuisines
if 'df' in locals() and 'Cuisine ' in df.columns: # Corrected column name
    counts = df['Cuisine '].value_counts() # Corrected column name
    df = df[df['Cuisine '].isin(counts[counts >= 3].index)].copy() # Corrected column name and added .copy()

    # Step 4: Prepare features and labels
    X = df['Ingredients']  # text features
    y = df['Cuisine ']      # labels # Corrected column name

    # Split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    # Step 5: Vectorize text using TF-IDF
    vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)
    X_train_vec = vectorizer.fit_transform(X_train)
    X_test_vec = vectorizer.transform(X_test)

    # Step 6: Train classifier (Multinomial Naive Bayes)
    clf = MultinomialNB()
    clf.fit(X_train_vec, y_train)

    # Step 7: Evaluate model
    y_pred = clf.predict(X_test_vec)
    print("Classification Report:\n")
    print(classification_report(y_test, y_pred))

    # Step 8: Save model and vectorizer
    joblib.dump(clf, 'cuisine_model.pkl')
    joblib.dump(vectorizer, 'vectorizer.pkl')
    print("Model and vectorizer saved as 'cuisine_model.pkl' and 'vectorizer.pkl'.")

    # Step 9: Automatically download the .pkl files
    try:
      files.download('cuisine_model.pkl')
      files.download('vectorizer.pkl')
    except Exception as e:
      print(f"Error downloading files: {e}")


    # Step 10: Function to predict cuisine for any recipe
    def predict_cuisine(ingredients_text):
        vec = vectorizer.transform([ingredients_text])
        return clf.predict(vec)[0]

    # Example usage
    example_recipe = "chicken, garlic, ginger, soy sauce"
    print(f"Predicted cuisine for example recipe: {predict_cuisine(example_recipe)}")
else:
    print("DataFrame was not loaded or 'Cuisine ' column is missing. Cannot proceed with model training and evaluation.") # Corrected message

Error: The file was not found at /content/recipes_10000_v2.csv. Please make sure the file is in the correct location.
Classification Report:

               precision    recall  f1-score   support

     American       0.06      0.04      0.05        99
    Brazilian       0.00      0.00      0.00        96
    Caribbean       0.04      0.06      0.05       101
      Chinese       0.00      0.00      0.00        92
    Ethiopian       0.04      0.04      0.04       101
       French       0.05      0.07      0.06        99
        Greek       0.03      0.01      0.02        96
       Indian       0.00      0.00      0.00        94
      Italian       0.03      0.05      0.04       100
     Japanese       0.07      0.07      0.07       104
       Korean       0.05      0.03      0.04        97
     Lebanese       0.12      0.09      0.10       102
Mediterranean       0.06      0.05      0.06        92
      Mexican       0.00      0.00      0.00        95
     Moroccan       0.06      0.

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Predicted cuisine for example recipe: American
