In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
import string
from sklearn.metrics.pairwise import cosine_similarity
import pickle

In [2]:
df = pd.read_excel('travel_destination_dataset.xlsx')

In [5]:
# Load the dataset
data = df  # Replace 'your_data.csv' with the actual file path
places_descriptions = data['description']

# Function to preprocess text data
def preprocess_text(text):
    # Tokenization
    tokens = word_tokenize(text)
    
    # Convert tokens to lowercase
    tokens = [token.lower() for token in tokens]
    
    # Remove punctuation
    tokens = [token for token in tokens if token not in string.punctuation]
    
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    
    # Join tokens back into a single string
    processed_text = ' '.join(tokens)
    
    return processed_text

# Preprocess the descriptions in the dataset
places_descriptions_processed = places_descriptions.apply(preprocess_text)
# print(places_descriptions_processed)

# # Vectorize the preprocessed text data using TF-IDF
tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(places_descriptions_processed)
print(tfidf_matrix)

# Function to recommend places based on user input
def recommend_places(user_input, tfidf_matrix=tfidf_matrix,top_n=5):
    # Preprocess the user input
    user_input_processed = preprocess_text(user_input)
    
    # Vectorize the user input
    user_input_vector = tfidf.transform([user_input_processed])
    
    # Compute cosine similarity between user input and places descriptions
    similarity_scores = cosine_similarity(user_input_vector, tfidf_matrix)
    
    # Get indices of places sorted by similarity score
    top_indices = similarity_scores.argsort()[0][::-1][:top_n]
    
    # Recommend top N places
    recommended_places = data.iloc[top_indices]['City']  # Assuming 'City' column contains place names
    return recommended_places

# Example usage
user_input = "I want to go to Temple"
top_n = 15
recommended_places = recommend_places(user_input, top_n=top_n)
print(recommended_places)


  (0, 1037)	0.06663494155067302
  (0, 3466)	0.07539393575604472
  (0, 1790)	0.07027025260186838
  (0, 2690)	0.029594743145052116
  (0, 628)	0.04075915318646397
  (0, 2420)	0.0462971868222649
  (0, 6023)	0.04835086662023259
  (0, 1952)	0.07027025260186838
  (0, 6226)	0.0462971868222649
  (0, 6409)	0.052752264191125
  (0, 3615)	0.10348159982578267
  (0, 6179)	0.07027025260186838
  (0, 724)	0.06151125839649669
  (0, 1334)	0.05385178845989272
  (0, 4256)	0.043993269985753314
  (0, 5292)	0.07539393575604472
  (0, 4229)	0.07539393575604472
  (0, 1526)	0.07539393575604472
  (0, 4797)	0.06663494155067302
  (0, 2104)	0.059563324466692165
  (0, 1532)	0.059563324466692165
  (0, 1757)	0.05787594734530134
  (0, 4516)	0.07027025260186838
  (0, 3693)	0.07027025260186838
  (0, 809)	0.07539393575604472
  :	:
  (285, 4431)	0.16029350628360767
  (285, 1673)	0.07395722042443884
  (285, 3473)	0.05511108010146727
  (285, 4342)	0.05054054041052003
  (285, 1788)	0.06564278491777527
  (285, 6425)	0.07150086048

In [6]:
# Save the TF-IDF vectorizer and matrix to disk
with open('tfidf_vectorizer.pkl', 'wb') as f:
    pickle.dump(tfidf, f)

with open('tfidf_matrix.pkl', 'wb') as f:
    pickle.dump(tfidf_matrix, f)