# Using data from MongoDB

### Connecting to Mongo

In [1]:
import pymongo
import os
import pandas as pd
from dotenv import load_dotenv
load_dotenv()

mongo_uri = os.getenv('MONGO_URI')

client = pymongo.MongoClient(mongo_uri)
db = client["TripPlanner"]

### Retrieve data from collections

In [2]:
places_data = db["new_places"].find()
ratings_data = db["ratings"].find()

### Create dataframes using retrieved data

In [3]:
new_places_df = pd.DataFrame(list(places_data))
new_places_df

Unnamed: 0,_id,features__id,features__geometry__coordinates__001,features__geometry__coordinates__002,features__properties__name,features__properties__kinds
0,6404770215f2cc1ab48c7fb1,7982642,73.856285,18.519789,Navagraha Shani Mandir,"religion,hindu_temples,interesting_places"
1,6404770215f2cc1ab48c7fb2,4982515,73.855301,18.520477,Shaniwar Wada Amphitheatre,"architecture,historic_architecture,fortificati..."
2,6404770215f2cc1ab48c7fb3,1880288,73.855309,18.520626,Bajirao I statue,"historic,monuments_and_memorials,interesting_p..."
3,6404770215f2cc1ab48c7fb4,5102363,73.856308,18.519026,Prem Vitthal,"religion,other_temples,interesting_places"
4,6404770215f2cc1ab48c7fb5,11471114,73.857239,18.519030,Kasba Ganpati,"religion,hindu_temples,interesting_places"
...,...,...,...,...,...,...
2575,6411d121a19cc2a24775c9b1,8594529,72.830902,19.141336,Infinity Mall,"cultural,cinemas,theatres_and_entertainments,i..."
2576,6411d121a19cc2a24775c9b2,7082282,72.831009,19.141422,Cinemax Versova,"cultural,cinemas,theatres_and_entertainments,i..."
2577,6411d121a19cc2a24775c9b3,7982585,72.816658,19.129932,Someshwar Temple,"religion,hindu_temples,interesting_places"
2578,6411d121a19cc2a24775c9b4,5686420,72.950737,19.037353,sunni islakiya masjid,"religion,other_temples,interesting_places"


In [4]:
ratings_df = pd.DataFrame(list(ratings_data))
ratings_df

Unnamed: 0,_id,userid,placeid,rating,tags,__v
0,643f73eec8a7ffef36f07450,643565e1006177e939eb847f,1879958,1,"[trek, adventure]",0
1,643f7462c8a7ffef36f07456,643a1934a6a6939514c45086,1879958,3,"[trek, adventure]",0
2,643f7acf49e3d898fa3d263b,643a23abe856f5f594218dc8,14927602,4,[themePark],0
3,643f7acf49e3d898fa3d263d,643a23abe856f5f594218dc8,5686277,5,[religious],0
4,643f7c5f49e3d898fa3d2648,643a23abe856f5f594218dc8,6793257,5,"[nature, trek]",0
5,643f7c5f49e3d898fa3d264a,643a23abe856f5f594218dc8,15382572,2,[historic],0


# CBR

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

### Creating TF*IDF matrix for place kinds

In [6]:
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix_places = tf.fit_transform(new_places_df['features__properties__kinds'])

### Creating dataframe for a particular user

In [8]:
userid = '643a23abe856f5f594218dc8'
from bson import ObjectId
userid = ObjectId(userid)
# Selecting all the rows of this userid
user_ratings_df = ratings_df[ratings_df['userid'] == userid]
user_places_df = pd.merge(user_ratings_df, new_places_df, left_on='placeid', right_on='features__id')
user_df = user_places_df[['userid', 'placeid', 'features__properties__name', 'features__properties__kinds', 'rating']]
user_df

Unnamed: 0,userid,placeid,features__properties__name,features__properties__kinds,rating
0,643a23abe856f5f594218dc8,14927602,Shivaji Park,"gardens_and_parks,cultural,urban_environment,i...",4
1,643a23abe856f5f594218dc8,5686277,Hanuman Mandir,"religion,hindu_temples,interesting_places",5
2,643a23abe856f5f594218dc8,6793257,Baner Hill,"mountain_peaks,geological_formations,natural,i...",5
3,643a23abe856f5f594218dc8,15382572,Metropolitan Building,"historic_architecture,architecture,interesting...",2


In [9]:
cosine_similarities = linear_kernel(tfidf_matrix_places, tfidf_matrix_places)
results = {}
cosine_similarities

array([[1.        , 0.00603317, 0.01658343, ..., 1.        , 0.12979324,
        0.01505941],
       [0.00603317, 1.        , 0.0291088 , ..., 0.00603317, 0.00598185,
        0.0264337 ],
       [0.01658343, 0.0291088 , 1.        , ..., 0.01658343, 0.01644236,
        0.07265856],
       ...,
       [1.        , 0.00603317, 0.01658343, ..., 1.        , 0.12979324,
        0.01505941],
       [0.12979324, 0.00598185, 0.01644236, ..., 0.12979324, 1.        ,
        0.0149313 ],
       [0.01505941, 0.0264337 , 0.07265856, ..., 0.01505941, 0.0149313 ,
        1.        ]])

In [10]:
for idx, row in new_places_df.iterrows():
    similar_indices = cosine_similarities[idx].argsort()[:-100:-1]
    similar_items = [(cosine_similarities[idx][i], new_places_df['features__id'][i]) for i in similar_indices]

    results[row['features__id']] = similar_items[1:]
    
print('done!')
results

done!


{7982642: [(0.9999999999999998, 2003776),
  (0.9999999999999998, 7990551),
  (0.9999999999999998, 5101862),
  (0.9999999999999998, 7982328),
  (0.9999999999999998, 5101847),
  (0.9999999999999998, 7982327),
  (0.9999999999999998, 11238122),
  (0.9999999999999998, 7989415),
  (0.9999999999999998, 7982310),
  (0.9999999999999998, 7982309),
  (0.9999999999999998, 1865262),
  (0.9999999999999998, 5167649),
  (0.9999999999999998, 5167651),
  (0.9999999999999998, 7989420),
  (0.9999999999999998, 1865144),
  (0.9999999999999998, 5101861),
  (0.9999999999999998, 7980728),
  (0.9999999999999998, 5167653),
  (0.9999999999999998, 1865174),
  (0.9999999999999998, 2003227),
  (0.9999999999999998, 7980118),
  (0.9999999999999998, 7980117),
  (0.9999999999999998, 11356407),
  (0.9999999999999998, 5168082),
  (0.9999999999999998, 2003219),
  (0.9999999999999998, 5705883),
  (0.9999999999999998, 5101845),
  (0.9999999999999998, 1866183),
  (0.9999999999999998, 1866148),
  (0.9999999999999998, 1864555),

In [11]:
def item(id):
    return new_places_df.loc[new_places_df['features__id'] == id]['features__properties__name'].tolist()[0]

In [12]:
# set to avoid duplicate recommedations
recs_set = set()
# print(type(set))

In [13]:
def recommend(item_id, num):
    recs = results[item_id][:num]
    for rec in recs:
        if rec[1] not in recs_set:
            recs_set.add(rec[1])
            print("Recommended: " +
                  item(rec[1]) + " (score:" + str(rec[0]) + ")" + 'id: ', (rec[1]))

In [14]:
for idx, row in user_df.iterrows():
    recs_set.add(row['placeid'])
    # Only making recommendations for places rated 4 or above by user
    if row['rating'] >= 4:
        print(f"PLACES RECOMMENDED FOR {row['features__properties__name']}:")
        recommend(row['placeid'], 5)
        print()

PLACES RECOMMENDED FOR Shivaji Park:
Recommended: Indraprastha Park (score:0.9999999999999999)id:  11502934
Recommended: Tathawade Garden, Pune (score:0.9999999999999999)id:  15841658
Recommended: Sunder Nursery (score:0.9999999999999999)id:  16077515
Recommended: Jantar Mantar (score:0.9999999999999999)id:  11687845

PLACES RECOMMENDED FOR Hanuman Mandir:
Recommended: Pipaleshwar Mahadev Temple (score:0.9999999999999998)id:  2003776
Recommended: Sri Ranganathaswamy Temple (score:0.9999999999999998)id:  7990551
Recommended: temple (score:0.9999999999999998)id:  5101862
Recommended: Dandu Maramma Temple (score:0.9999999999999998)id:  7982328
Recommended: Temple (score:0.9999999999999998)id:  5101847

PLACES RECOMMENDED FOR Baner Hill:
Recommended: Antop Hill (score:0.9999999999999999)id:  6604001
Recommended: Hanuman Tekdi (score:0.9999999999999999)id:  6604008
Recommended: Malhargad (score:0.9999999999999999)id:  11211230
Recommended: Sinhagad (score:0.9999999999999999)id:  6604083
Rec