In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load your existing dataset with columns: 'customer_id', 'product_id', 'product_name', 'rating'
# Replace 'restaurant_data.csv' with the path to your dataset
data = pd.read_csv('restaurant_data.csv')

# Calculate the average rating for each product
product_ratings = data.groupby('product_id')['rating'].mean().reset_index()

# Create a TF-IDF vectorizer for product names
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(data['product_name'])

# Calculate the cosine similarity between product names
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Create a mapping between product names and product IDs
product_name_to_id = {product_name: product_id for product_name, product_id in zip(data['product_name'], data['product_id'])}

# Function to get recommendations for a customer based on their order history
def get_recommendations(customer_id):
    # Get all the products ordered by the customer
    customer_orders = data[data['customer_id'] == customer_id]['product_id'].unique()

    # Remove products the customer has already ordered
    all_products = data['product_id'].unique()
    available_products = [product for product in all_products if product not in customer_orders]

    # Calculate the average rating for each available product
    avg_ratings = product_ratings[product_ratings['product_id'].isin(available_products)]

    # Merge the average ratings with product names
    avg_ratings = pd.merge(avg_ratings, product_ratings, on='product_id')

    # Calculate a weighted score for recommendations
    avg_ratings['weighted_score'] = avg_ratings['rating_x'] * avg_ratings['rating_y']

    # Sort by the weighted score in descending order
    avg_ratings = avg_ratings.sort_values(by='weighted_score', ascending=False)

    # Get the top 3 recommended products (or fewer if not enough available products)
    top_recommendations = avg_ratings['product_id'].head(3)

    # Map product IDs to product names
    recommended_products = [data[data['product_id'] == product_id]['product_name'].iloc[0] for product_id in top_recommendations]

    return recommended_products

# For a customer (assuming 'customer_id'), get recommendations based on their order history
customer_id = 73  # Replace with the actual customer ID
recommendations = get_recommendations(customer_id)
print("Recommended Products for Customer", customer_id, ":", recommendations)


Recommended Products for Customer 73 : ['Soup', 'Sandwich', 'Coffee']


MAIN CODE

In [26]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load your existing dataset with columns: 'customer_id', 'product_id', 'product_name', 'rating'
# Replace 'restaurant_data.csv' with the path to your dataset
data = pd.read_csv('restaurant_data.csv')

# Create a TF-IDF vectorizer for product names
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(data['product_name'])

# Calculate the cosine similarity between product names
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Function to get recommendations for a customer based on their order history and 5-star rated products
def get_recommendations(customer_id):
    # Get all the products ordered by the customer
    customer_orders = data[data['customer_id'] == customer_id]['product_id'].unique()

    # Remove products the customer has already ordered
    all_products = data['product_id'].unique()
    available_products = [product for product in all_products if product not in customer_orders]

    # Create a list of 5-star rated products by the customer
    five_star_products = data[(data['customer_id'] == customer_id) & (data['rating'] == 5)]['product_id'].unique()

    # Add the 5-star rated products to the available products list
    available_products.extend(five_star_products)

    # Calculate a weighted score for recommendations
    avg_ratings = data[data['product_id'].isin(available_products)].groupby('product_id')['rating'].mean().reset_index()
    avg_ratings = avg_ratings.rename(columns={'rating': 'avg_rating'})

    # Sort by the weighted score in descending order
    avg_ratings = avg_ratings.sort_values(by='avg_rating', ascending=False)

    # Get the top 3 recommended products (or fewer if not enough available products)
    top_recommendations = avg_ratings['product_id'].head(3)

    # Map product IDs to product names
    recommended_products = [data[data['product_id'] == product_id]['product_name'].iloc[0] for product_id in top_recommendations]

    return recommended_products

# For a customer (assuming 'customer_id'), get recommendations based on their order history and 5-star rated products
customer_id = 78  # Replace with the actual customer ID
recommendations = get_recommendations(customer_id)
print("Recommended Products for Customer", customer_id, ":", recommendations)


Recommended Products for Customer 78 : ['Soup', 'Sandwich', 'Coffee']


In [27]:
import pandas as pd

# Load your existing dataset with columns: 'product_id', 'product_name', 'rating'
# Replace 'restaurant_data.csv' with the path to your dataset
data = pd.read_csv('restaurant_data.csv')

# Top 10 highest-rated products
top_rated_products = data.groupby('product_name')['rating'].mean().sort_values(ascending=False).head(10)

# Top 10 most frequently purchased products
top_frequent_products = data['product_name'].value_counts().head(10)

print("Top 10 Highest-Rated Products:")
print(top_rated_products)

print("\nTop 10 Most Frequently Purchased Products:")
print(top_frequent_products)


Top 10 Highest-Rated Products:
product_name
Soup             3.857143
Sandwich         3.500000
Coffee           3.250000
Burger           3.000000
Chicken Wings    3.000000
Salad            3.000000
Sushi            2.666667
Fries            2.500000
Steak            2.500000
Ice Cream        2.333333
Name: rating, dtype: float64

Top 10 Most Frequently Purchased Products:
product_name
Chicken Wings    8
Soup             7
Burger           5
Tacos            4
Soda             4
Coffee           4
Ice Cream        3
Sushi            3
Pasta            3
Fries            2
Name: count, dtype: int64


In [28]:
import pandas as pd

# Load your existing dataset with columns: 'customer_id', 'product_id', 'rating'
# Replace 'restaurant_data.csv' with the path to your dataset
data = pd.read_csv('restaurant_data.csv')

# Find repetitive customers (ordered more than once) and their order frequency
customer_frequency = data['customer_id'].value_counts()
repetitive_customers = customer_frequency[customer_frequency > 1].index

# Create a DataFrame to store orders, ratings, and customer order frequency
repetitive_orders = data[data['customer_id'].isin(repetitive_customers)]
repetitive_orders['customer_frequency'] = repetitive_orders['customer_id'].map(customer_frequency)

# Display orders, ratings, and customer order frequency
print("Repetitive Customers with Their Orders, Ratings, and Order Frequency:")
print(repetitive_orders)


Repetitive Customers with Their Orders, Ratings, and Order Frequency:
    customer_id  product_id product_name  rating  customer_frequency
4            20           5        Sushi       2                   3
7            75           4        Salad       3                   2
12           30          15         Soup       3                   2
15            1           2       Burger       4                   2
22           30           2       Burger       1                   2
27           56           5        Sushi       4                   3
32           20           6        Steak       2                   3
33           56           2       Burger       4                   3
37           56          15         Soup       2                   3
41           75          10         Soda       3                   2
48            1          15         Soup       4                   2
49           20          15         Soup       5                   3


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  repetitive_orders['customer_frequency'] = repetitive_orders['customer_id'].map(customer_frequency)


In [29]:
import pandas as pd

# Load your existing dataset with columns: 'customer_id', 'product_id', 'rating'
# Replace 'restaurant_data.csv' with the path to your dataset
data = pd.read_csv('restaurant_data.csv')

# Find repetitive customers (ordered more than once) and their order frequency
customer_frequency = data['customer_id'].value_counts()
repetitive_customers = customer_frequency[customer_frequency > 1].index

# Create a DataFrame to store orders, ratings, customer order frequency, and products they ordered
repetitive_orders = data[data['customer_id'].isin(repetitive_customers)]
repetitive_orders['customer_frequency'] = repetitive_orders['customer_id'].map(customer_frequency)

# Function to get all the products ordered by each customer
def get_ordered_products(customer_id):
    customer_data = repetitive_orders[repetitive_orders['customer_id'] == customer_id]
    ordered_products = customer_data['product_id'].unique()
    # Convert ordered_products to strings before joining
    ordered_products = [str(product) for product in ordered_products]
    return ', '.join(ordered_products)

repetitive_orders['ordered_products'] = repetitive_orders['customer_id'].apply(get_ordered_products)

# Display orders, ratings, customer order frequency, and products they ordered
print("Repetitive Customers with Their Orders, Ratings, Order Frequency, and Ordered Products:")
print(repetitive_orders[['customer_id', 'rating', 'customer_frequency', 'ordered_products']])


Repetitive Customers with Their Orders, Ratings, Order Frequency, and Ordered Products:
    customer_id  rating  customer_frequency ordered_products
4            20       2                   3         5, 6, 15
7            75       3                   2            4, 10
12           30       3                   2            15, 2
15            1       4                   2            2, 15
22           30       1                   2            15, 2
27           56       4                   3         5, 2, 15
32           20       2                   3         5, 6, 15
33           56       4                   3         5, 2, 15
37           56       2                   3         5, 2, 15
41           75       3                   2            4, 10
48            1       4                   2            2, 15
49           20       5                   3         5, 6, 15


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  repetitive_orders['customer_frequency'] = repetitive_orders['customer_id'].map(customer_frequency)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  repetitive_orders['ordered_products'] = repetitive_orders['customer_id'].apply(get_ordered_products)
