### Code cleaning


In [11]:
import pandas as pd

In [12]:
data = pd.read_csv("Data set of famous India tourist places along with there images.csv")

In [13]:
data.head()

Unnamed: 0,Zone,State,City,Name,Type,Establishment Year,time needed to visit in hrs,Google review rating,Entrance Fee in INR,Airport with 50km Radius,Weekly Off,Significance,DSLR Allowed,Number of google review in lakhs,Best Time to visit,Image URL
0,Western,Gujarat,Rann of Kutch,Rann Utsav,Cultural,Unknown,3.0,4.9,7500,Yes,,Cultural,Yes,0.1,Evening,https://www.rannutsav.com/blog/wp-content/uplo...
1,Northern,Punjab,Amritsar,Golden Temple (Harmandir Sahib),Religious Site,1604,1.5,4.9,0,Yes,,Spiritual,Yes,1.9,All,https://upload.wikimedia.org/wikipedia/commons...
2,Northern,Ladakh,Leh,Pangong Tso,Lake,Unknown,2.0,4.9,20,Yes,,Nature,Yes,0.15,Morning,https://dynamic-media-cdn.tripadvisor.com/medi...
3,Western,Maharastra,Mumbai,Siddhivinayak Temple,Temple,1881,2.0,4.8,0,Yes,,Religious,No,1.05,All,https://upload.wikimedia.org/wikipedia/commons...
4,Western,Gujarat,Somnath,Somnath Temple,Temple,1951,2.0,4.8,0,No,,Religious,No,0.39,Morning,https://upload.wikimedia.org/wikipedia/commons...


In [14]:
cols_to_remove = [
    "time needed to visit in hrs",
    "Entrance Fee in INR",
    "Airport with 50km Radius",
    "Weekly Off",
    "DSLR Allowed",
    "Number of google review in lakhs",
    "Establishment Year"
]

cleaned_data = data.drop(columns=cols_to_remove)

# Show cleaned dataset preview
cleaned_data.head()

Unnamed: 0,Zone,State,City,Name,Type,Google review rating,Significance,Best Time to visit,Image URL
0,Western,Gujarat,Rann of Kutch,Rann Utsav,Cultural,4.9,Cultural,Evening,https://www.rannutsav.com/blog/wp-content/uplo...
1,Northern,Punjab,Amritsar,Golden Temple (Harmandir Sahib),Religious Site,4.9,Spiritual,All,https://upload.wikimedia.org/wikipedia/commons...
2,Northern,Ladakh,Leh,Pangong Tso,Lake,4.9,Nature,Morning,https://dynamic-media-cdn.tripadvisor.com/medi...
3,Western,Maharastra,Mumbai,Siddhivinayak Temple,Temple,4.8,Religious,All,https://upload.wikimedia.org/wikipedia/commons...
4,Western,Gujarat,Somnath,Somnath Temple,Temple,4.8,Religious,Morning,https://upload.wikimedia.org/wikipedia/commons...


In [15]:
df = cleaned_data

In [16]:
df.head()

Unnamed: 0,Zone,State,City,Name,Type,Google review rating,Significance,Best Time to visit,Image URL
0,Western,Gujarat,Rann of Kutch,Rann Utsav,Cultural,4.9,Cultural,Evening,https://www.rannutsav.com/blog/wp-content/uplo...
1,Northern,Punjab,Amritsar,Golden Temple (Harmandir Sahib),Religious Site,4.9,Spiritual,All,https://upload.wikimedia.org/wikipedia/commons...
2,Northern,Ladakh,Leh,Pangong Tso,Lake,4.9,Nature,Morning,https://dynamic-media-cdn.tripadvisor.com/medi...
3,Western,Maharastra,Mumbai,Siddhivinayak Temple,Temple,4.8,Religious,All,https://upload.wikimedia.org/wikipedia/commons...
4,Western,Gujarat,Somnath,Somnath Temple,Temple,4.8,Religious,Morning,https://upload.wikimedia.org/wikipedia/commons...


In [17]:
df["features"] = df["Type"] + " " + df["Significance"] + " " + df["State"] + " " + df["City"]

In [18]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Convert features to TF-IDF
vectorizer = TfidfVectorizer(stop_words="english")
tfidf_matrix = vectorizer.fit_transform(df["features"])


In [19]:
from sklearn.metrics.pairwise import cosine_similarity

# Compute cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


In [20]:
def recommend_place(place_name, top_n=5):
    # Get index of the place
    idx = df[df["Name"] == place_name].index[0]
    
    # Get similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort by similarity (ignore itself)
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    
    # Get recommended indices
    place_indices = [i[0] for i in sim_scores]
    
    # Return recommended places with images
    return df.iloc[place_indices][["Name", "City", "State", "Best Time to visit", "Image URL"]]


In [21]:
recommend_place("Belur Math", top_n=5)


Unnamed: 0,Name,City,State,Best Time to visit,Image URL
31,Dakshineswar Kali Temple,Kolkata,West Bengal,Morning,https://upload.wikimedia.org/wikipedia/commons...
205,Kalighat Kali Temple,Kolkata,West Bengal,Morning,https://www.templepurohit.com/wp-content/uploa...
207,Marble Palace,Kolkata,West Bengal,Afternoon,https://upload.wikimedia.org/wikipedia/commons...
81,Victoria Memorial,Kolkata,West Bengal,Morning,https://upload.wikimedia.org/wikipedia/commons...
83,Indian Museum,Kolkata,West Bengal,Morning,https://upload.wikimedia.org/wikipedia/commons...


In [22]:
import pickle

# Suppose your cosine similarity matrix is called 'cosine_sim'
with open("cosine_sim.pkl", "wb") as f:
    pickle.dump(cosine_sim, f)

print("Model saved as cosine_sim.pkl")


Model saved as cosine_sim.pkl
