In [9]:
#import libraries
import json
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [10]:
with open('data/pakistan.json') as f:
    data = json.load(f)

data

{'lahore': {'food': [{'name': 'Butt Karahi',
    'location': 'Lakshmi Chowk, Lahore'},
   {'name': 'Phajja Siri Paye', 'location': 'Shah Alam Market, Lahore'},
   {'name': 'Haveli Restaurant',
    'location': 'Food Street, Fort Road, Lahore'}],
  'hotels': [{'name': 'Avari Lahore', 'location': 'Mall Road, Lahore'},
   {'name': 'Pearl Continental',
    'location': 'Shahrah-e-Quaid-e-Azam, Lahore'}],
  'places': [{'name': 'Badshahi Mosque', 'location': 'Walled City, Lahore'},
   {'name': 'Lahore Fort', 'location': 'Fort Road, Lahore'}]},
 'karachi': {'food': [{'name': 'Kolachi', 'location': 'Do Darya, Karachi'},
   {'name': 'Cafe Flo', 'location': 'Clifton, Karachi'},
   {'name': 'BBQ Tonight', 'location': 'Boat Basin, Karachi'}],
  'hotels': [{'name': 'Marriott Hotel',
    'location': 'Shahrah-e-Faisal, Karachi'},
   {'name': 'Avari Towers', 'location': 'Fatima Jinnah Road, Karachi'}],
  'places': [{'name': 'Clifton Beach', 'location': 'Clifton, Karachi'},
   {'name': 'Frere Hall', 'loc

In [11]:
rows = []

for city, info in data.items():
    for category in ["food", "hotels", "places"]:
        for item in info.get(category, []):
            rows.append({
                "city": city,
                "category": category,
                "name": item["name"],
                "location": item["location"],
                "text": f"{item['name']} in {city} - {category} near {item['location']}"
            })

df = pd.DataFrame(rows)

df.to_csv("model/data.csv", index=False)


In [12]:
# TF-IDF Vectorizer
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df["text"])


In [13]:
# Example user query (simulate chat input)
user_query = "recommend me food in lahore"

# Transform query using same vectorizer
query_vec = vectorizer.transform([user_query])


In [14]:
# Compute cosine similarity
scores = cosine_similarity(query_vec, X).flatten()

# Get top 3 results
top_indices = scores.argsort()[-3:][::-1]

# Display results
df.iloc[top_indices]


Unnamed: 0,city,category,name,location,text
3,lahore,hotels,Avari Lahore,"Mall Road, Lahore",Avari Lahore in lahore - hotels near Mall Road...
2,lahore,food,Haveli Restaurant,"Food Street, Fort Road, Lahore",Haveli Restaurant in lahore - food near Food S...
6,lahore,places,Lahore Fort,"Fort Road, Lahore","Lahore Fort in lahore - places near Fort Road,..."


In [15]:
def recommend(query, data_frame, vectorizer):
    query_vec = vectorizer.transform([query])
    scores = cosine_similarity(query_vec, X).flatten()
    top_indices = scores.argsort()[-3:][::-1]
    return data_frame.iloc[top_indices]

# Try it!
recommend("best places to visit in karachi", df, vectorizer)


Unnamed: 0,city,category,name,location,text
13,karachi,places,Frere Hall,"Saddar, Karachi","Frere Hall in karachi - places near Saddar, Ka..."
12,karachi,places,Clifton Beach,"Clifton, Karachi",Clifton Beach in karachi - places near Clifton...
8,karachi,food,Cafe Flo,"Clifton, Karachi","Cafe Flo in karachi - food near Clifton, Karachi"


In [16]:
import os
import pickle

# Ensure the 'model' directory exists
os.makedirs("model", exist_ok=True)

# Save vectorizer
with open("model/vectorizer.pkl", "wb") as f:
    pickle.dump(vectorizer, f)

# Save data to CSV
df.to_csv("model/data.csv", index=False)
