<a href="https://colab.research.google.com/github/2303a51730/AIML-2025_B11/blob/main/2303A51730_16_11.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
import pandas as pd
import numpy as np
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk

In [24]:
# Sample data for travel destinations, categories, review texts, and ratings
data = {
    'destination': ['Tokyo', 'Kyoto', 'Seoul', 'Beijing', 'Taipei', 'Hokkaido', 'Osaka', 'Jeju Island', 'Hong Kong', 'Shanghai'],
        'category': ['Urban', 'Historical', 'Urban', 'Historical', 'Nature', 'Nature', 'Urban', 'Beach', 'Urban', 'Urban'],
            'review_text': [
                    'Great place to explore with temples and city life.',
                            'Beautiful historical sites and temples.',
                                    'Modern city with lots of entertainment.',
                                            'Rich cultural heritage and historical monuments.',
                                                    'Amazing nature with lots of hiking and scenery.',
                                                            'Cold but scenic with beautiful mountains.',
                                                                    'Vibrant city with plenty of nightlife and shopping.',
                                                                            'A great beach destination with resorts.',
                                                                                    'Bustling city, a must-visit for shopping and food.',
                                                                                            'Futuristic city with incredible architecture.'
                                                                                                ],
                                                                                                    'rating': [5, 4, 4, 5, 5, 4, 4, 5, 4, 5]
                                                                                                    }


In [25]:
# Convert the sample data into a pandas DataFrame
df = pd.DataFrame(data)

In [26]:
# 1. Identify the top 5 travel destinations in East Asia (based on the number of reviews)
top_destinations = df['destination'].value_counts().head(5)
print(f"Top 5 Travel Destinations:\n{top_destinations}\n")

Top 5 Travel Destinations:
destination
Tokyo      1
Kyoto      1
Seoul      1
Beijing    1
Taipei     1
Name: count, dtype: int64



In [27]:
# 2. Name the category most liked by travelers (based on average rating)
category_ratings = df.groupby('category')['rating'].mean()
most_liked_category = category_ratings.idxmax()
print(f"The most liked category: {most_liked_category}\n")

The most liked category: Beach



In [28]:
# 3. Identify the max and min counts of the most liked category of travelers (based on ratings)
most_liked_category_reviews = df[df['category'] == most_liked_category]
max_reviews = most_liked_category_reviews['rating'].max()
min_reviews = most_liked_category_reviews['rating'].min()
print(f"Max and Min ratings for the most liked category ({most_liked_category}): Max={max_reviews}, Min={min_reviews}\n")


Max and Min ratings for the most liked category (Beach): Max=5, Min=5



In [29]:
# 4. Predict the count of travelers who liked religious institutions the most
# Assuming 'Religious' or 'Historical' categories are related to religious institutions
religious_category_reviews = df[df['category'].str.contains('Religious|Historical', case=False)]
religious_likes_count = len(religious_category_reviews)
print(f"Predicted count of travelers who liked religious institutions the most: {religious_likes_count}\n")

Predicted count of travelers who liked religious institutions the most: 2



In [30]:

# 5. Identify the travelers who liked the beaches and picnic spots the most
# Filtering reviews for 'Beach' or 'Picnic' categories
beach_or_picnic_reviews = df[df['category'].str.contains('Beach|Picnic', case=False)]
beach_or_picnic_likes = beach_or_picnic_reviews[['destination', 'review_text']]
print("Travelers who liked beaches and picnic spots the most:")
print(beach_or_picnic_likes, "\n")


Travelers who liked beaches and picnic spots the most:
   destination                              review_text
7  Jeju Island  A great beach destination with resorts. 



In [31]:

# 6. Sentiment Analysis for each review (to determine positive or negative sentiment)
# Download the VADER lexicon for sentiment analysis (if not already installed)
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


True

In [32]:

# Initialize the SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()

In [33]:

# Adding sentiment scores for review texts
df['sentiment_score'] = df['review_text'].apply(lambda x: sia.polarity_scores(x)['compound'])

print("Sentiment Scores for each review:")
print(df[['destination', 'review_text', 'sentiment_score']])

Sentiment Scores for each review:
   destination                                        review_text  sentiment_score
0        Tokyo  Great place to explore with temples and city l...           0.6249
1        Kyoto            Beautiful historical sites and temples.           0.5994
2        Seoul            Modern city with lots of entertainment.           0.4215
3      Beijing   Rich cultural heritage and historical monuments.           0.5574
4       Taipei    Amazing nature with lots of hiking and scenery.           0.5859
5     Hokkaido          Cold but scenic with beautiful mountains.           0.7469
6        Osaka  Vibrant city with plenty of nightlife and shop...           0.5267
7  Jeju Island            A great beach destination with resorts.           0.6249
8    Hong Kong  Bustling city, a must-visit for shopping and f...           0.0000
9     Shanghai      Futuristic city with incredible architecture.           0.0000
