In [1]:
# Importing Necessary Libraries
import numpy as np
import pandas as pd
from textblob import TextBlob
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import warnings

warnings.filterwarnings("ignore")

In [2]:
df = pd.read_excel("Reviews Dataset.xlsx")
df = df.rename(columns={"category": "Category", "district": "District"})

print(f"Shape of Reviews Dataset: {df.shape}")
print(f"Columns of Review Dataset: {list(df.columns)}\n")
df.head()

Shape of Reviews Dataset: (1173, 4)
Columns of Review Dataset: ['Place Name', 'Review', 'Category', 'District']



Unnamed: 0,Place Name,Review,Category,District
0,Ansoo Lake,The tranquility of this place is unmatched. Pe...,Lake,Khyber Pakhtunkhwa
1,Ansoo Lake,Absolutely stunning views! The water's clarity...,Lake,Khyber Pakhtunkhwa
2,Ansoo Lake,A serene escape from the hustle and bustle of ...,Lake,Khyber Pakhtunkhwa
3,Ansoo Lake,"The journey was adventurous, and the destinati...",Lake,Khyber Pakhtunkhwa
4,Ansoo Lake,Early morning reflections on the lake are mesm...,Lake,Khyber Pakhtunkhwa


In [3]:
# Function to Calculate Sentiment using TextBlob
"""
    Positive: 1,
    Neutral: 0,
    Negative: -1
"""

def calculate_sentiment(text):
    analysis = TextBlob(text)
    if analysis.sentiment.polarity > 0:
        # return 'Positive'
        return 1
    elif analysis.sentiment.polarity == 0:
        # return 'Neutral'
        return 0
    else:
        # return 'Negative'
        return -1

df["Sentiment"] = df["Review"].apply(calculate_sentiment)
print(df.Sentiment.value_counts())

 1    1030
 0      73
-1      70
Name: Sentiment, dtype: int64


In [4]:
# Encoding the Categorical Variables
encoder = OrdinalEncoder()
categorical_features = ["District", "Category"]
x = df[categorical_features].values
y = df["Place Name"].values

x = encoder.fit_transform(x)
y = df["Place Name"]

# Splitting the Dataset into Training and Testing Sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
print(f"Shape of X-Train: {x_train.shape}")
print(f"Shape of X-Test: {x_test.shape}")
print(f"Shape of Y-Train: {y_train.shape}")
print(f"Shape of Y-Test: {y_test.shape}")

Shape of X-Train: (938, 2)
Shape of X-Test: (235, 2)
Shape of Y-Train: (938,)
Shape of Y-Test: (235,)


In [5]:
model = RandomForestClassifier(random_state=1)
model.fit(x_train, y_train)

In [6]:
y_pred = model.predict(x_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.4851063829787234


### Recommending 1 Place

In [7]:
new_place = model.predict(encoder.transform([["Gilgit−Baltistan", "Lake"]]))
print("Predicted Place:", new_place)

Predicted Place: ['Attabad Lake']


### Recommending 5 Places

In [8]:
# Get Unique Place Names as an Array
unique_place_names_array = df["Place Name"].unique()

# Convert the Array to a List
place_names = unique_place_names_array.tolist()

In [9]:
# Predict Probabilities for a New Instance
new_instance = encoder.transform([["Punjab", "Lake"]])
probabilities = model.predict_proba(new_instance)

# Select the Top 5 Places
top_5_places = np.argsort(probabilities[0])[-5:][::-1]

recommended_places = [place_names[i] for i in top_5_places]
print("Recommended Places:", recommended_places)

Recommended Places: ['Khewra Salt Mine', 'Ansoo Lake', 'Karambar Lake', 'Bhurban', 'Lake Saif-ul-Malook']


In [12]:
# Predict Probabilities for a New Instance
new_instance = encoder.transform([["Balochistan", "Coastal"]])
probabilities = model.predict_proba(new_instance)

# Select the Top 5 Places
top_5_places = np.argsort(probabilities[0])[-5:][::-1]

recommended_places = [place_names[i] for i in top_5_places]
print("Recommended Places:", recommended_places)

Recommended Places: ['Lasbela Beach', 'West Bay Beach', 'Sonmiani Beach', 'Gawadar', 'Kanhatti Gardens']
