In [14]:
import pandas as pd
import numpy as np
import os


In [15]:
df = pd.read_csv(r"C:\Users\UserK\Desktop\Python_Projects\Data Engineering-Weekend Getaway Ranker\dataset\Top Indian Places to Visit.csv")
df.head()


Unnamed: 0.1,Unnamed: 0,Zone,State,City,Name,Type,Establishment Year,time needed to visit in hrs,Google review rating,Entrance Fee in INR,Airport with 50km Radius,Weekly Off,Significance,DSLR Allowed,Number of google review in lakhs,Best Time to visit
0,0,Northern,Delhi,Delhi,India Gate,War Memorial,1921,0.5,4.6,0,Yes,,Historical,Yes,2.6,Evening
1,1,Northern,Delhi,Delhi,Humayun's Tomb,Tomb,1572,2.0,4.5,30,Yes,,Historical,Yes,0.4,Afternoon
2,2,Northern,Delhi,Delhi,Akshardham Temple,Temple,2005,5.0,4.6,60,Yes,,Religious,No,0.4,Afternoon
3,3,Northern,Delhi,Delhi,Waste to Wonder Park,Theme Park,2019,2.0,4.1,50,Yes,Monday,Environmental,Yes,0.27,Evening
4,4,Northern,Delhi,Delhi,Jantar Mantar,Observatory,1724,2.0,4.2,15,Yes,,Scientific,Yes,0.31,Morning


In [16]:
df.columns = (
    df.columns
    .str.strip()
    .str.lower()
    .str.replace(" ", "_")
)

df.columns.tolist()


['unnamed:_0',
 'zone',
 'state',
 'city',
 'name',
 'type',
 'establishment_year',
 'time_needed_to_visit_in_hrs',
 'google_review_rating',
 'entrance_fee_in_inr',
 'airport_with_50km_radius',
 'weekly_off',
 'significance',
 'dslr_allowed',
 'number_of_google_review_in_lakhs',
 'best_time_to_visit']

In [17]:
CITY_COL = "city"
STATE_COL = "state"
LAT_COL = "latitude"       
LON_COL = "longitude"      
RATING_COL = "google_review_rating"
POPULARITY_COL = "number_of_google_review_in_lakhs"



In [18]:
df = df.dropna(subset=[CITY_COL, STATE_COL, RATING_COL, POPULARITY_COL])
df.reset_index(drop=True, inplace=True)

df.head()


Unnamed: 0,unnamed:_0,zone,state,city,name,type,establishment_year,time_needed_to_visit_in_hrs,google_review_rating,entrance_fee_in_inr,airport_with_50km_radius,weekly_off,significance,dslr_allowed,number_of_google_review_in_lakhs,best_time_to_visit
0,0,Northern,Delhi,Delhi,India Gate,War Memorial,1921,0.5,4.6,0,Yes,,Historical,Yes,2.6,Evening
1,1,Northern,Delhi,Delhi,Humayun's Tomb,Tomb,1572,2.0,4.5,30,Yes,,Historical,Yes,0.4,Afternoon
2,2,Northern,Delhi,Delhi,Akshardham Temple,Temple,2005,5.0,4.6,60,Yes,,Religious,No,0.4,Afternoon
3,3,Northern,Delhi,Delhi,Waste to Wonder Park,Theme Park,2019,2.0,4.1,50,Yes,Monday,Environmental,Yes,0.27,Evening
4,4,Northern,Delhi,Delhi,Jantar Mantar,Observatory,1724,2.0,4.2,15,Yes,,Scientific,Yes,0.31,Morning


In [19]:
def distance_score_proxy(source_row, target_row):
    if source_row[CITY_COL] == target_row[CITY_COL]:
        return 0
    
    if source_row[STATE_COL] == target_row[STATE_COL]:
        return 1.0   
    
    if source_row["zone"] == target_row["zone"]:
        return 0.6   
    
    return 0.3       


In [20]:
def recommend_destinations(source_city, top_n=5):
    source_city = source_city.strip().lower()
    
    if source_city not in df[CITY_COL].str.lower().values:
        return f"Source city '{source_city}' not found in dataset."
    
    source = df[df[CITY_COL].str.lower() == source_city].iloc[0]
    recs = df.copy()
    
    
    recs["distance_score"] = recs.apply(
        lambda row: distance_score_proxy(source, row),
        axis=1
    )
    
    
    recs = recs[recs[CITY_COL].str.lower() != source_city]
    
    
    recs["rating_score"] = recs[RATING_COL] / recs[RATING_COL].max()
    recs["popularity_score"] = recs[POPULARITY_COL] / recs[POPULARITY_COL].max()
    
   
    recs["final_score"] = (
        0.4 * recs["distance_score"] +
        0.4 * recs["rating_score"] +
        0.2 * recs["popularity_score"]
    )
    
    return recs.sort_values(
        by="final_score", ascending=False
    ).head(top_n)[[
        "name", CITY_COL, STATE_COL,
        RATING_COL, POPULARITY_COL, "final_score"
    ]]


In [22]:
recommend_destinations("Mumbai")



Unnamed: 0,name,city,state,google_review_rating,number_of_google_review_in_lakhs,final_score
130,Mahalakshmi Temple,Kolhapur,Maharastra,4.8,0.9,0.816161
126,Sai Baba Temple,Shirdi,Maharastra,4.7,0.69,0.802322
123,Shaniwar Wada,Pune,Maharastra,4.4,1.2,0.791616
128,Ganapatipule Temple,Ratnagiri,Maharastra,4.7,0.1,0.786376
124,Ajanta Caves,Aurangabad,Maharastra,4.6,0.21,0.781186


In [23]:
recommend_destinations("Delhi")


Unnamed: 0,name,city,state,google_review_rating,number_of_google_review_in_lakhs,final_score
305,Gurudwara Bangla Sahib,New Delhi,Delhi,4.8,1.05,0.820215
313,Jama Masjid,New Delhi,Delhi,4.5,0.49,0.78059
318,Rail Museum,New Delhi,Delhi,4.4,0.24,0.76567
315,Buddh International Circuit,Greater Noida,Uttar Pradesh,4.6,7.4,0.69551
92,Golden Temple (Harmandir Sahib),Amritsar,Punjab,4.9,1.9,0.691351


In [24]:
recommend_destinations("Bengaluru")


Unnamed: 0,name,city,state,google_review_rating,number_of_google_review_in_lakhs,final_score
111,Mysore Palace,Mysore,Karnataka,4.6,2.5,0.843078
31,ISKCON Temple Bangalore,Bangalore,Karnataka,4.6,1.14,0.806321
28,Lalbagh Botanical Garden,Bangalore,Karnataka,4.4,1.5,0.799724
30,Vidhana Soudha,Bangalore,Karnataka,4.6,0.8,0.797132
119,Murudeshwar Temple,Murudeshwar,Karnataka,4.7,0.49,0.796917


In [25]:
os.makedirs("outputs", exist_ok=True)

for city in ["Mumbai", "Delhi", "Bengaluru"]:
    result = recommend_destinations(city)
    if isinstance(result, pd.DataFrame):
        result.to_csv(
            f"outputs/{city.lower()}_recommendations.csv",
            index=False
        )
