# Data Visualizations - Top Indian Places to Visit

Comprehensive visualizations of the Indian places dataset including distributions, comparisons, and geographic insights.

In [None]:
# Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set style for better-looking plots
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

# Load the dataset
df = pd.read_csv('../data/Top Indian Places to Visit.csv')
print("Dataset loaded successfully!")
print(f"Dataset shape: {df.shape}")

## 1. Google Review Ratings Distribution

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
plt.hist(df['Google review rating'], bins=20, color='steelblue', edgecolor='black', alpha=0.7)
plt.xlabel('Google Review Rating', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.title('Distribution of Google Review Ratings', fontsize=14, fontweight='bold')
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

## 2. Entrance Fee Distribution

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
plt.hist(df['Entrance Fee in INR'], bins=30, color='coral', edgecolor='black', alpha=0.7)
plt.xlabel('Entrance Fee (INR)', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.title('Distribution of Entrance Fees', fontsize=14, fontweight='bold')
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

## 3. Time Needed to Visit Distribution

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
plt.hist(df['time needed to visit in hrs'], bins=25, color='lightgreen', edgecolor='black', alpha=0.7)
plt.xlabel('Time Needed (hours)', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.title('Distribution of Time Needed to Visit Places', fontsize=14, fontweight='bold')
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

## 4. Top 15 Places by Number of Google Reviews

In [None]:
top_reviews = df.nlargest(15, 'Number of google review in lakhs')[['Name', 'Number of google review in lakhs']].sort_values('Number of google review in lakhs')
fig, ax = plt.subplots(figsize=(12, 8))
plt.barh(top_reviews['Name'], top_reviews['Number of google review in lakhs'], color='skyblue', edgecolor='black')
plt.xlabel('Number of Reviews (in lakhs)', fontsize=12)
plt.title('Top 15 Places by Number of Google Reviews', fontsize=14, fontweight='bold')
plt.grid(axis='x', alpha=0.3)
plt.tight_layout()
plt.show()

## 5. Places Distribution by Zone

In [None]:
zone_counts = df['Zone'].value_counts().sort_values()
fig, ax = plt.subplots(figsize=(10, 6))
plt.barh(zone_counts.index, zone_counts.values, color='mediumpurple', edgecolor='black')
plt.xlabel('Number of Places', fontsize=12)
plt.title('Distribution of Places by Zone', fontsize=14, fontweight='bold')
plt.grid(axis='x', alpha=0.3)
for i, v in enumerate(zone_counts.values):
    plt.text(v + 0.3, i, str(v), va='center')
plt.tight_layout()
plt.show()

## 6. Top 10 Places by Type

In [None]:
type_counts = df['Type'].value_counts().head(10)
fig, ax = plt.subplots(figsize=(12, 6))
bars = plt.bar(range(len(type_counts)), type_counts.values, color='teal', edgecolor='black', alpha=0.7)
plt.xticks(range(len(type_counts)), type_counts.index, rotation=45, ha='right')
plt.ylabel('Number of Places', fontsize=12)
plt.title('Top 10 Place Types by Count', fontsize=14, fontweight='bold')
plt.grid(axis='y', alpha=0.3)
for i, v in enumerate(type_counts.values):
    plt.text(i, v + 0.5, str(v), ha='center', va='bottom')
plt.tight_layout()
plt.show()

## 7. Best Time to Visit Distribution

In [None]:
best_time = df['Best Time to visit'].value_counts()
fig, ax = plt.subplots(figsize=(10, 6))
colors_time = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#FFA07A', '#98D8C8']
bars = plt.bar(best_time.index, best_time.values, color=colors_time, edgecolor='black', alpha=0.8)
plt.ylabel('Number of Places', fontsize=12)
plt.title('Best Time to Visit Distribution', fontsize=14, fontweight='bold')
plt.grid(axis='y', alpha=0.3)
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height,
            f'{int(height)}', ha='center', va='bottom')
plt.tight_layout()
plt.show()

## 8. Correlation Heatmap of Numerical Features

In [None]:
numeric_cols = df.select_dtypes(include=[np.number]).columns
correlation_matrix = df[numeric_cols].corr()

fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, 
            square=True, linewidths=1, cbar_kws={"shrink": 0.8}, fmt='.2f')
plt.title('Correlation Heatmap of Numerical Features', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

## 9. Scatter Plot: Google Rating vs Number of Reviews

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))
scatter = plt.scatter(df['Google review rating'], df['Number of google review in lakhs'], 
                     c=df['Entrance Fee in INR'], cmap='viridis', s=100, alpha=0.6, edgecolors='black')
plt.xlabel('Google Review Rating', fontsize=12)
plt.ylabel('Number of Reviews (in lakhs)', fontsize=12)
plt.title('Google Rating vs Number of Reviews', fontsize=14, fontweight='bold')
cbar = plt.colorbar(scatter)
cbar.set_label('Entrance Fee (INR)', fontsize=11)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 10. Box Plot: Google Rating by Place Type (Top 8 Types)

In [None]:
top_8_types = df['Type'].value_counts().head(8).index
df_top_types = df[df['Type'].isin(top_8_types)]

fig, ax = plt.subplots(figsize=(12, 6))
sns.boxplot(data=df_top_types, x='Type', y='Google review rating', palette='Set2')
plt.xlabel('Place Type', fontsize=12)
plt.ylabel('Google Review Rating', fontsize=12)
plt.title('Distribution of Ratings by Top 8 Place Types', fontsize=14, fontweight='bold')
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

## 11. Top 10 States by Number of Places

In [None]:
top_states = df['State'].value_counts().head(10).sort_values()
fig, ax = plt.subplots(figsize=(12, 7))
colors_states = plt.cm.Spectral(np.linspace(0, 1, len(top_states)))
plt.barh(top_states.index, top_states.values, color=colors_states, edgecolor='black')
plt.xlabel('Number of Places', fontsize=12)
plt.title('Top 10 States by Number of Places', fontsize=14, fontweight='bold')
plt.grid(axis='x', alpha=0.3)
for i, v in enumerate(top_states.values):
    plt.text(v + 0.3, i, str(v), va='center')
plt.tight_layout()
plt.show()

## 12. Average Entrance Fee by Significance Category

In [None]:
avg_fee_by_sig = df.groupby('Significance')['Entrance Fee in INR'].mean().sort_values(ascending=False)
fig, ax = plt.subplots(figsize=(11, 6))
bars = plt.bar(range(len(avg_fee_by_sig)), avg_fee_by_sig.values, 
               color='gold', edgecolor='black', alpha=0.8)
plt.xticks(range(len(avg_fee_by_sig)), avg_fee_by_sig.index, rotation=45, ha='right')
plt.ylabel('Average Entrance Fee (INR)', fontsize=12)
plt.title('Average Entrance Fee by Significance Category', fontsize=14, fontweight='bold')
plt.grid(axis='y', alpha=0.3)
for i, v in enumerate(avg_fee_by_sig.values):
    plt.text(i, v + 2, f'â‚¹{v:.0f}', ha='center', va='bottom', fontweight='bold')
plt.tight_layout()
plt.show()

## Summary

This visualization notebook provides comprehensive insights into the Indian places dataset including:

- **Distribution Analysis**: Ratings, entrance fees, and visit duration patterns
- **Popularity Metrics**: Top-rated and most-reviewed places
- **Geographic Insights**: Distribution across zones and states
- **Category Analysis**: Types and significance of places
- **Facility Features**: DSLR permissions and best visit times
- **Correlation Analysis**: Relationships between different numerical features

All visualizations help understand visitor preferences, pricing patterns, and geographic coverage of tourist destinations in India.