In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import OneHotEncoder

In [None]:
df = pd.read_csv("swiggy_cleaned_data.csv")
df.shape

(148506, 6)

In [None]:
top_cities = df['city'].value_counts().head(10).index

swiggy_df = df[
    df['city'].isin(top_cities)
].reset_index(drop=True)

swiggy_df.shape

(85481, 6)

In [None]:
swiggy_df.head()

Unnamed: 0,name,rating,cost,cuisine,rating_count_num,city
0,M.A.D By Tomato'S,4.3,1200.0,"Indian,Chinese",100.0,Ahmedabad
1,Tea Post,4.0,150.0,Fast Food,100.0,Ahmedabad
2,Shanghai Chicken Lolipops,3.894461,300.0,"Chinese,Fast Food",0.0,Ahmedabad
3,Ministry Of Momos,3.894461,300.0,Chinese,0.0,Ahmedabad
4,Sizzling - The Cake Room,3.894461,350.0,Desserts,0.0,Ahmedabad


In [None]:
swiggy_df.dtypes

Unnamed: 0,0
name,object
rating,float64
cost,float64
cuisine,object
rating_count_num,float64
city,object


In [None]:
cat_cols = ['cuisine', 'city']

In [None]:
encoder = OneHotEncoder(
    drop='first',
    handle_unknown='ignore',
    sparse_output=True
)

encoded_cat = encoder.fit_transform(swiggy_df[cat_cols])

In [None]:
import pickle

pickle.dump(encoded_cat, open("encoded_city_cuisine.pkl", "wb"))
pickle.dump(encoder, open("encoder.pkl", "wb"))

In [None]:
#from sklearn.decomposition import PCA

#pca = PCA(n_components=50, random_state=42)
#pca_data = pca.fit_transform(encoded_dense)

In [None]:
#import pickle

#pickle.dump(pca_data, open("pca_city_cuisine.pkl", "wb"))
#pickle.dump(pca, open("pca_model.pkl", "wb"))

In [None]:
print("Major city cleaned rows :", swiggy_df.shape[0])
print("Encoded rows :", encoded_cat.shape[0])

Major city cleaned rows : 85481
Encoded rows : 85481


In [None]:
swiggy_df = swiggy_df.reset_index(drop=True)

In [None]:
print(swiggy_df.iloc[:5])
print(encoded_cat[:5].toarray())

                        name    rating    cost            cuisine  \
0          M.A.D By Tomato'S  4.300000  1200.0     Indian,Chinese   
1                   Tea Post  4.000000   150.0          Fast Food   
2  Shanghai Chicken Lolipops  3.894461   300.0  Chinese,Fast Food   
3          Ministry Of Momos  3.894461   300.0            Chinese   
4   Sizzling - The Cake Room  3.894461   350.0           Desserts   

   rating_count_num       city  
0             100.0  Ahmedabad  
1             100.0  Ahmedabad  
2               0.0  Ahmedabad  
3               0.0  Ahmedabad  
4               0.0  Ahmedabad  
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [None]:
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
import pickle

In [None]:
from sklearn.decomposition import TruncatedSVD

svd = TruncatedSVD(n_components=20, random_state=42)
svd_features = svd.fit_transform(encoded_cat)

print("SVD output shape:", svd_features.shape)

SVD output shape: (85481, 20)


In [None]:
import pickle

pickle.dump(svd, open("svd_model.pkl", "wb"))

In [None]:
np.save("svd_features.npy", svd_features)

In [None]:
query_index = 10
query_vector = svd_features[query_index].reshape(1, -1)

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

similarity_scores = cosine_similarity(query_vector, svd_features)[0]


In [None]:
import numpy as np

top_n = 5
top_indices = np.argsort(similarity_scores)[::-1][1:top_n+1]



In [None]:
recommendations = swiggy_df.iloc[top_indices][
    ['name', 'city', 'cuisine', 'rating', 'cost', 'rating_count_num']
]

print(recommendations)


                                 name       city           cuisine    rating  \
560                 Kathiyawad Ni Moj  Ahmedabad  Snacks,Beverages  3.894461   
2386  Faasos' Signature Wraps & Rolls  Ahmedabad  Snacks,Beverages  3.894461   
2384                      Apnaa Addaa  Ahmedabad  Snacks,Beverages  3.894461   
2370               Harmony Resto Cafe  Ahmedabad  Snacks,Beverages  4.400000   
1980        Kathiyavadi Bhajiya Point  Ahmedabad  Snacks,Beverages  3.894461   

       cost  rating_count_num  
560    99.0               0.0  
2386  350.0               0.0  
2384  200.0               0.0  
2370  300.0             100.0  
1980  150.0               0.0  


In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd

def search_restaurant(city, cuisine, cost, rating, k=5):

    # 1️⃣ Create query dataframe (like query text)
    query_df = pd.DataFrame([{
        'city': city,
        'cuisine': cuisine,
        'cost': cost,
        'rating': rating
    }])

    # 2️⃣ Encode categorical columns (like tfidf.transform)
    query_cat = encoder.transform(query_df[['cuisine', 'city']])

    # 3️⃣ Apply SVD (same space as dataset)
    query_vec = svd.transform(query_cat)

    # 4️⃣ Cosine similarity (query vs all restaurants)
    similarity = cosine_similarity(query_vec, svd_features).flatten()

    # 5️⃣ Get top k indices
    top_indices = similarity.argsort()[-k:][::-1]

    # 6️⃣ Fetch and sort results
    result = swiggy_df.iloc[top_indices][
        ['name', 'city', 'cuisine', 'rating', 'cost']
    ]

    return result.sort_values(by=['rating', 'cost'], ascending=False)


In [None]:
search_restaurant(
    city="Chennai",
    cuisine="Chinese",
    cost=300,
    rating=4.2,
    k=3
)

Unnamed: 0,name,city,cuisine,rating,cost
22761,Momo Sa-Khang by Kailash Kitchen,Chennai,Chinese,4.6,200.0
22754,PANDA POT,Chennai,Chinese,3.894461,300.0
28085,FOUR SQUARE CHAI & CHILL BRO,Chennai,Chinese,3.894461,300.0
