In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="whitegrid")

In [None]:
travel_path = "/Users/jaideepsai/Desktop/DATA-ANALYTICS/Data Source/extracted-data/Travel/Top Indian Places to Visit.csv"
travel_data = pd.read_csv(travel_path)
travel_data.head()

In [None]:
travel_data.sample(10)

In [None]:
travel_data['Name'].value_counts()

In [None]:
travel_data.info()

In [None]:
travel_data.describe()

In [None]:
travel_data.isnull().sum()

In [None]:
travel_data['Weekly Off'].value_counts()

In [None]:
travel_data['Weekly Off'].fillna('nan', inplace=True)

In [None]:
travel_data.isnull().sum()

In [None]:
travel_data.drop('Unnamed: 0', axis=1, inplace=True)

In [None]:
travel_data.isnull().sum()

# NUMBER OF LANDMARKS BY STATE 

In [None]:
landmarks_by_state = travel_data.groupby('State')['Name'].count().sort_values(ascending=False)

plt.figure(figsize=(12, 8), dpi=200)
landmarks_by_state.plot(kind='bar')
plt.title('Number of Landmarks by State')
plt.xlabel('State')
plt.ylabel('Number of Landmarks')
plt.xticks(rotation=45, ha="right")
plt.show()

# STATE WITH MOST VISITING PLACES 

In [None]:
plt.figure(figsize=(10,10))
sns.countplot( y = 'State', data = travel_data)

# THE TOP 5 GOOGLE RATED VISITING PLACES TYPES 

In [None]:
top_5_names = travel_data['Significance'].value_counts().head(5).index
top_5_rating_place = travel_data[travel_data['Significance'].isin(top_5_names)]

plt.figure(figsize=(20, 10))
sns.countplot(x='Google review rating', hue='Significance', data=top_5_rating_place)
plt.title('Top 5 Rated Place Types')
plt.show()

# BEST TIME TO VISIT PLACES

In [None]:
top_5_city = travel_data['Name'].head(20).values
top_5_city_data = travel_data[travel_data['Name'].isin(top_5_city)]
plt.figure(figsize=(10,5))
sns.countplot(y='Name', hue='Best Time to visit', data=top_5_city_data)
plt.title('Places with Best Time to Visit')
plt.show()

In [None]:
city_name = travel_data['Name'].sample(20).values
city_name_data = travel_data[travel_data['Name'].isin(city_name)]

plt.figure(figsize=(10,5))
sns.countplot(y='Name', hue='Best Time to visit', data=city_name_data)
plt.title('Places with Best Time to Visit')
plt.show()


# TYPE OF PLACES DSLR (CAMERA) IS PERMITTED

In [None]:
plt.figure(figsize=(10,5))
sns.countplot(y='Significance',hue='DSLR Allowed',data=travel_data)

#  TIME REQUIRED TO VISIT DIFFERENT TYPES OF PLACES

In [None]:
plt.figure(figsize=(10, 7))

# Box plot to show the distribution of 'Time Needed to Visit' by 'Significance'
sns.boxplot(x='time needed to visit in hrs', y='Significance', data=travel_data, palette='Set3')
plt.title('Distribution of Time Needed to Visit by Significance')
plt.xlabel('Significance')
plt.ylabel('Time Needed to Visit (hours)')
plt.grid(True)
plt.show()

In [None]:
# Distribution of Landmarks by Cultural Significance
significance_counts = travel_data['Significance'].value_counts()

plt.figure(figsize=(12, 8))
sns.barplot(x=significance_counts, y=significance_counts.index)
plt.title('Distribution of Landmarks by Cultural Significance')
plt.xlabel('Number of Landmarks')
plt.ylabel('Cultural Significance')
plt.show()

# For a more detailed analysis, let's also look at the distribution of cultural significance within the top 3 zones based on the number of landmarks
top_3_zones = travel_data['Zone'].value_counts().head(3).index
for zone in top_3_zones:
    plt.figure(figsize=(10, 6), dpi=200)
    zone_data = travel_data[travel_data['Zone'] == zone]
    zone_significance_count = travel_data['Significance'].value_counts().head(10)
    sns.barplot(x=zone_significance_count, y=zone_significance_count.index)
    plt.title(f'Top 10 Cultural Significances in {zone} Zone')
    plt.xlabel('Number of Landmarks')
    plt.ylabel('Cultural Significance')
    plt.show()

In [None]:
plt.figure(figsize=(25, 6))
sns.barplot(x='State', y='Google review rating', data=travel_data, palette='pastel')
plt.title('Average Google Review Rating by State')
plt.xlabel('State')
plt.ylabel('Average Google Review Rating')
plt.xticks(rotation=45)
plt.show()

# TOP 10 ESTABLISHMENT TYPES 

In [None]:
top_types = travel_data['Type'].value_counts().nlargest(10).index
plt.figure(figsize=(12, 6))
sns.countplot(x='Type', data=travel_data[travel_data['Type'].isin(top_types)], order=top_types, palette='Set2')
plt.title('Top Ten Establishments by Type')
plt.xlabel('Establishment Type')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()