In [11]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [12]:
# Step 1: Load the CSV file into a DataFrame
def load_data_from_csv(file_path):
    # Load the CSV data into a pandas DataFrame
    data = pd.read_csv(file_path)
    return data

In [13]:
# Step 2: Preprocess the data (e.g., select relevant features for comparison)
def preprocess_data(df, feature_columns):
    # Extract only the columns relevant for calculating similarity
    feature_data = df[feature_columns]
    
    # Fill any missing values with 0 (or you can use other imputation techniques)
    feature_data = feature_data.fillna(0)
    
    # Normalize the data (optional but recommended for cosine similarity)
    scaler = StandardScaler()
    feature_data_scaled = scaler.fit_transform(feature_data)
    
    return feature_data_scaled

In [14]:
# Step 3: Calculate cosine similarity between the food products
def calculate_cosine_similarity(feature_data_scaled):
    # Compute the cosine similarity matrix between the items
    similarity_matrix = cosine_similarity(feature_data_scaled)
    return similarity_matrix

In [15]:
# Step 4: Recommend similar products
def recommend_similar_products(df, similarity_matrix, product_index, top_n=5):
    # Get the similarity scores for the target product
    similarity_scores = list(enumerate(similarity_matrix[product_index]))
    
    # Sort by similarity scores in descending order
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    
    # Get the indices of the most similar products (excluding the target product itself)
    similar_products = [index for index, score in similarity_scores if index != product_index][:top_n]
    
    # Return the top N similar products
    return df.iloc[similar_products]

In [23]:
# Main function to load data, preprocess, and recommend similar products
def main(csv_file, product_index, top_n=5):
    # Step 1: Load the data from CSV
    df = load_data_from_csv(csv_file)
    
    # Step 2: Preprocess the data
    # Specify the columns you want to use for comparison (e.g., nutritional values, ingredients)
    feature_columns = ['categories_tags', 'ingredients_tags', 'food_groups_tags']
    feature_data_scaled = preprocess_data(df, feature_columns)
    
    # Step 3: Calculate cosine similarity
    similarity_matrix = calculate_cosine_similarity(feature_data_scaled)
    
    # Step 4: Recommend similar products
    similar_products = recommend_similar_products(df, similarity_matrix, product_index, top_n)
    
    # Print the recommended products
    print(f"Top {top_n} similar products to '{df.iloc[product_index]['product_name']}':")
    print(similar_products[['product_name', 'categories_tags', 'ingredients_tags', 'food_groups_tags']])


In [24]:
# Example usage:
if __name__ == '__main__':
    csv_file_path = r'C:\Users\Arnab\OneDrive\Desktop\MajorProject\Dataset_Part2.csv'  # Path to your CSV file
    product_index_to_compare = 0  # Index of the product to compare with others (0 means first product)
    top_n_recommendations = 5  # Number of similar products to recommend
    
    main(csv_file_path, product_index_to_compare, top_n_recommendations)

KeyError: "['food_group_tags'] not in index"