In [1]:
pip install spacy

Note: you may need to restart the kernel to use updated packages.


In [12]:
import pandas as pd
import numpy as np
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer
import spacy

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Jkuma\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [33]:
df=pd.read_csv(r"D:\merge\new_dataframe.csv",encoding='latin-1')

In [34]:
# Tokenization and stopword removal using NLTK

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('omw-1.4')
stop_words = set(stopwords.words('english'))


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Jkuma\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Jkuma\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\Jkuma\AppData\Roaming\nltk_data...


In [35]:
df.head()

Unnamed: 0,Place,Rating,Best Time to Visit,Description,Tags,Latitude,Longitude
0,Agartala,3.8,Throughout the year,"Home to the Manikya Kings in the past, Agartal...","""Gateway to Tripuran Heritage""",23.831238,91.282382
1,Agra,4.2,October to March,Located on the banks of River Yamuna in Uttar ...,"""The city of Taj Mahal, the monument of etern...",27.175255,78.009816
2,Ahmedabad,4.1,October to March,"A rapidly growing metropolis, an industrial hu...","""Manchester of the East""",23.021624,72.579707
3,Ajanta and Ellora Caves,4.4,June to March,"Ajanta and Ellora caves, considered to be one ...","""World Heritage Ancient Rock-Cut Caves""",20.0268,20.5329
4,Ajmer,3.8,October to March,"Surrounded by Aravali ranges, the city of Ajme...","""The land of Ajmer Sharif Dargah""",26.4691,74.639


In [36]:
df['Description'] = df['Description'].apply(lambda x: word_tokenize(x.lower()))
df['Description'] = df['Description'].apply(lambda x: [word for word in x if word not in stop_words])

In [37]:
# Lemmatization using NLTK
nltk.download('wordnet')
lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Jkuma\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [38]:
df['Description'] = df['Description'].apply(lambda x: [lemmatizer.lemmatize(word) for word in x])

In [40]:
!python -m spacy download en_core_web_sm


Defaulting to user installation because normal site-packages is not writeable
Collecting en-core-web-sm==3.5.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl (12.8 MB)
     --------------------------------------- 12.8/12.8 MB 21.8 MB/s eta 0:00:00
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-3.5.0
[38;5;2m[+] Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')




In [41]:
# Lemmatization using spaCy
nlp = spacy.load('en_core_web_sm')

In [42]:
df['Description'] = df['Description'].apply(lambda x: ' '.join(x))
df['Description'] = df['Description'].apply(lambda x: [token.lemma_ for token in nlp(x)])

In [47]:
# Print the preprocessed 'dataset
df['Description'].head()

0    [home, manikya, king, past, ,, agartala, perfe...
1    [locate, bank, river, yamuna, uttar, pradesh, ...
2    [rapidly, grow, metropolis, ,, industrial, hub...
3    [ajanta, ellora, cave, ,, consider, one, fine,...
4    [surround, aravali, range, ,, city, ajmer, fam...
Name: Description, dtype: object

In [52]:
# Create TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()
df['Place'] = df['Place'].astype(str)
df['Description']=df['Description'].astype(str)
# Fit and transform the lemmatized descriptions
tfidf_matrix = tfidf_vectorizer.fit_transform(df['Description'])
# Compute cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [55]:
# Function to recommend similar places based on description
def recommend_places(place_name, num_recommendations):
    # Get the index of the given place name
    place_index = df[df['Place'] == place_name].index[0]

    # Get the cosine similarity scores for the given place index
    sim_scores = list(enumerate(cosine_sim[place_index]))

    # Sort the places based on similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the indices of top similar places
    top_indices = [i[0] for i in sim_scores[1:num_recommendations+1]]

    # Return the recommended places
    return df['Place'].iloc[top_indices]

In [61]:
# Get user input for place name and number of recommendations
place_name = input("Enter a place name: ")
num_recommendations = int(input("Enter the number of recommendations: "))


Enter a place name: Jaipur
Enter the number of recommendations: 5


In [62]:
# Call the recommend_places function and display the recommended places
recommended_places = recommend_places(place_name, num_recommendations)
print(recommended_places)

39    Jaisalmer
82      Pushkar
4         Ajmer
26        Delhi
92      Udaipur
Name: Place, dtype: object


In [69]:


# Function to recommend places based on a keyword
def recommend_places_by_keyword(keyword, num_recommendations):
    # Filter the DataFrame based on the keyword in the description or tags column
    matching_places = df[df['Description'].str.contains(keyword, case=False) | df['Tags'].str.contains(keyword, case=False)]

    # Get the top recommended places based on the number of recommendations requested
    recommended_places = matching_places.head(num_recommendations)['Place']

    # Return the recommended places
    return recommended_places

# Get user input for the keyword and number of recommendations
keyword = input("Enter a keyword: ")
num_recommendations = int(input("Enter the number of recommendations: "))

# Call the recommend_places_by_keyword function and display the recommended places
recommended_places = recommend_places_by_keyword(keyword, num_recommendations)
print(recommended_places)


Enter a keyword: heritage
Enter the number of recommendations: 2
0    Agartala
1        Agra
Name: Place, dtype: object
