In [1]:
import pandas as pd
import numpy as np

In [4]:
df = pd.read_csv("Tourism.csv")

In [5]:
df.head()

Unnamed: 0,subcategories,subtype,name,numberOfReviews,rating,rawRanking,Region,District
0,"Zoos & Aquariums, Water & Amusement Parks, Fun...","Theme Parks, Aquariums, Game & Entertainment C...",Goosebumps Virtual Escape,82,5.0,4.418244,North Goa District,Baga
1,Sights & Landmarks,"Ancient Ruins, Architectural Buildings",Walk to blueCity heritagetour,112,5.0,4.416904,Jodhpur District,Jodhpur
2,Spas & Wellness,Spas,Kerala Ayurveda Kendra,1252,5.0,4.462176,Jaipur District,Jaipur
3,Fun & Games,Game & Entertainment Centers,Xcapade Adventures,701,5.0,4.450218,Ajmer District,Pushkar
4,Shopping,Gift & Specialty Shops,Jaipur Blue Pottery Art Centre,1217,5.0,4.452539,Jaipur District,Jaipur


In [6]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [7]:
def calculate_recommendation_score(row):
    return row['numberOfReviews'] /row['rawRanking']

In [11]:
def recommend_places_with_similarity(dataset, region, subcategory, subtype=None):
    if subtype:
        filtered_data = dataset[(dataset['Region'] == region) & 
                                (dataset['subcategories'] == subcategory) & 
                                (dataset['subtype'] == subtype)].copy()  # Make a copy to avoid the warning
    else:
        filtered_data = dataset[(dataset['Region'] == region) & 
                                (dataset['subcategories'] == subcategory)].copy()  # Make a copy to avoid the warning

    if filtered_data.empty:
        return "No recommendations found for the selected region and subcategory."

    filtered_data['recommendation_score'] = filtered_data.apply(calculate_recommendation_score, axis=1)

    vectorizer = TfidfVectorizer()
    if subtype:
        feature_text = filtered_data['subcategories'] + ' ' + filtered_data['subtype'] 
    else:
        feature_text = filtered_data['subcategories']
    feature_matrix = vectorizer.fit_transform(feature_text)

    similarity_matrix = cosine_similarity(feature_matrix, feature_matrix)

    sim_scores = list(enumerate(similarity_matrix[-1]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[0:]  

    top_indices = [x[0] for x in sim_scores]
    top_recommendations = filtered_data.iloc[top_indices].sort_values(by=['recommendation_score'], ascending=False)

    return top_recommendations[['name', 'rating', 'numberOfReviews', 'recommendation_score']]


In [12]:
subtype = 'Water Parks'
region = 'Pune District'
subcategory = 'Water & Amusement Parks'
recommended_places = recommend_places_with_similarity(df, region, subcategory,subtype)
print(recommended_places)


                                      name  rating  numberOfReviews  \
324  Wet N Joy Water Park & Amusement Park     4.0              200   

     recommendation_score  
324             65.391495  


In [13]:
region = 'Jaipur District'
subcategory = 'Sights & Landmarks'
recommended_places = recommend_places_with_similarity(df, region, subcategory,subtype=None)
print(recommended_places)

                                   name  rating  numberOfReviews  \
15                         Amber Palace     4.5            18542   
55                            Jal Mahal     4.0             3932   
59                        Monkey Temple     4.0             1959   
253                 Govind Devji Temple     4.5              709   
95                       Galtaji Temple     4.0              378   
159                 Royal Gaitor Tumbas     4.5              335   
62   Khole Ke Hanuman JI Temple, Jaipur     4.5              269   
240                Gatore Ki Chhatriyan     4.5               92   
147                        Patrika Gate     4.5               84   

     recommendation_score  
15            3708.400000  
55            1160.104874  
59             559.830522  
253            170.659909  
95             117.465928  
159             85.363957  
62              75.253954  
240             26.617689  
147             23.019665  
