#Frequenceies

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#code for saving the frequencies

import pandas as pd
from collections import defaultdict
import csv

# Step 1: Load and preprocess the recipe data
def load_recipes(filepath):
    return pd.read_csv(filepath)

# Step 2: Calculate occurrence and co-occurrence frequencies
def calculate_occurrence(recipes):
    ingredient_freq = defaultdict(int)
    for recipe in recipes['ingredients']:
        ingredients = recipe.split(", ")
        for ingredient in ingredients:
            ingredient_freq[ingredient] += 1
    return ingredient_freq

def calculate_cooccurrence(recipes):
    co_occurrence_freq = defaultdict(int)
    for recipe in recipes['ingredients']:
        ingredients = recipe.split(", ")
        for i, ing1 in enumerate(ingredients):
            for ing2 in ingredients[i+1:]:
                pair = tuple(sorted([ing1, ing2]))
                co_occurrence_freq[pair] += 1
    return co_occurrence_freq

# Step 4: Save ingredient frequencies to CSV
def save_ingredient_freq(ingredient_freq, filepath):
    with open(filepath, 'w', newline='') as outfile:
        writer = csv.writer(outfile)
        writer.writerow(['Ingredient', 'Frequency'])
        for ingredient, freq in ingredient_freq.items():
            writer.writerow([ingredient, freq])

# Step 5: Save co-occurrence frequencies to CSV
def save_cooccurrence_freq(co_occurrence_freq, filepath):
    with open(filepath, 'w', newline='') as outfile:
        writer = csv.writer(outfile)
        writer.writerow(['Ingredient Pair', 'Frequency'])
        for pair, freq in co_occurrence_freq.items():
            writer.writerow([f"{pair[0]}, {pair[1]}", freq])

# Step 7: Test the system with a sample recipe
recipes = load_recipes("/content/final_data5000.csv")


ingredient_freq = calculate_occurrence(recipes)
save_ingredient_freq(ingredient_freq, "/content/ingredient_freq.csv")
print("Ingredient frequencies saved to /content/ingredient_freq.csv")

co_occurrence_freq = calculate_cooccurrence(recipes)
save_cooccurrence_freq(co_occurrence_freq, "/content/co_occurrence_freq.csv")
print("Co-occurrence frequencies saved to /content/co_occurrence_freq.csv")



Ingredient frequencies saved to /content/ingredient_freq.csv
Co-occurrence frequencies saved to /content/co_occurrence_freq.csv


#Extracting Unique elements

In [None]:
!pip install openai==0.28

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
Successfully installed openai-0.28.0


In [None]:
import pandas as pd

# Load the dataset
file_path = '/content/drive/MyDrive/project/Copy of Totally_cleaned_dataset_10000.csv'
recipes_df = pd.read_csv(file_path)

# Extract all ingredients from the "ingredients" column
all_ingredients = set()

for recipe in recipes_df['ingredients']:
    ingredients = recipe.split(", ")  # Assuming ingredients are separated by commas
    all_ingredients.update(ingredients)

# Convert the set of ingredients to a DataFrame
ingredients_df = pd.DataFrame(list(all_ingredients), columns=['ingredient'])

# Save the DataFrame to a new CSV file
output_file_path = '/content/unique_ingredients.csv'
ingredients_df.to_csv(output_file_path, index=False)

# Print the path to the saved file
print(f"Unique ingredients saved to {output_file_path}")


Unique ingredients saved to /content/unique_ingredients.csv


#Ingredient: Category

In [None]:
import openai
import pandas as pd

# Set your OpenAI API key
openai.api_key = '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'

def classify_ingredient(ingredient):
    # Define the categories
    categories = ["vegetable", "fruit", "spice", "herb", "grain", "dairy", "protein", "sweetener"]

    # Create the prompt
    prompt = (f"Classify the following ingredient into one of these categories: {', '.join(categories)}.\n"
              f"Ingredient: {ingredient}\n"
              f"Category:")

    # Make the API call
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",  # Use gpt-3.5-turbo model
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=10,
        temperature=0.3
    )

    # Extract and return the category
    category = response.choices[0].message['content'].strip()
    return category

def classify_ingredients_in_csv(input_csv, output_csv):
    # Read the CSV file
    df = pd.read_csv(input_csv)

    # Ensure there's a column for ingredients
    if 'ingredient' not in df.columns:
        raise ValueError("CSV file must contain an 'ingredient' column")

    # Classify each ingredient
    df['category'] = df['ingredient'].apply(classify_ingredient)

    # Save the results to a new CSV file
    df.to_csv(output_csv, index=False)

# Example usage
input_csv = '/content/unique_ingredients.csv'
output_csv = 'classified_ingredients.csv'
classify_ingredients_in_csv(input_csv, output_csv)


#Category AS a column

In [None]:
import pandas as pd

# Load your CSV file
df = pd.read_csv('/content/classified_ingredients.csv')

# Create a pivot table where categories are columns and ingredients are values
pivot_df = df.groupby('category')['ingredient'].apply(lambda x: ', '.join(x)).reset_index()

# Split the concatenated string of ingredients into separate rows
expanded_df = pivot_df.set_index('category')['ingredient'].str.split(', ', expand=True).T

# Save the result as a CSV file
expanded_df.to_csv('/content/categorized data.csv', index=False)

print("Transformed data has been saved.")


Transformed data has been saved.
