# Exploratory Data Analysis - Top Indian Places to Visit

This notebook performs exploratory analysis on the dataset of top places to visit in India.

In [None]:
# Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
df = pd.read_csv('../data/Top Indian Places to Visit.csv')
print("Dataset loaded successfully!")
print(f"Dataset shape: {df.shape}")

## Dataset Overview
Let's examine the structure and content of the dataset

In [None]:
# Display first few rows
print("First 5 rows of the dataset:")
print(df.head())
print("\nDataset Info:")
print(df.info())

In [None]:
# Check for missing values
print("Missing Values:")
print(df.isnull().sum())
print("\nMissing Value Percentage:")
print((df.isnull().sum() / len(df) * 100).round(2))

## Statistical Summary
Numerical features statistics

In [None]:
# Statistical summary of numeric columns
print("\nDescriptive Statistics:")
print(df.describe().round(2))

## Categorical Features Distribution
Count of unique values in categorical columns

In [None]:
# Unique counts for categorical columns
categorical_cols = df.select_dtypes(include='object').columns
print("Unique value counts in categorical columns:")
for col in categorical_cols:
    print(f"\n{col}: {df[col].nunique()} unique values")
    print(df[col].value_counts().head())

## Geographic Distribution
Places distribution by Zone, State, and City

In [None]:
# Geographic distribution
print("Places by Zone:")
print(df['Zone'].value_counts())
print("\n" + "="*50)
print("\nPlaces by State (Top 10):")
print(df['State'].value_counts().head(10))
print("\n" + "="*50)
print("\nPlaces by City (Top 10):")
print(df['City'].value_counts().head(10))

## Place Types Analysis
Distribution of different types of places in the dataset

In [None]:
# Place types distribution
print("Distribution of Place Types:")
print(df['Type'].value_counts())
print(f"\nTotal unique types: {df['Type'].nunique()}")

## Ratings & Reviews Analysis
Google review ratings and number of reviews

In [None]:
# Ratings and reviews analysis
print("Google Review Ratings Statistics:")
print(f"Mean Rating: {df['Google review rating'].mean():.2f}")
print(f"Min Rating: {df['Google review rating'].min():.2f}")
print(f"Max Rating: {df['Google review rating'].max():.2f}")
print(f"Median Rating: {df['Google review rating'].median():.2f}")

print("\n" + "="*50)
print("\nNumber of Google Reviews (in lakhs) Statistics:")
print(f"Mean Reviews: {df['Number of google review in lakhs'].mean():.2f}")
print(f"Min Reviews: {df['Number of google review in lakhs'].min():.2f}")
print(f"Max Reviews: {df['Number of google review in lakhs'].max():.2f}")
print(f"Median Reviews: {df['Number of google review in lakhs'].median():.2f}")

## Entrance Fee & Duration Analysis
Entrance fees and time needed to visit

In [None]:
# Entrance fee analysis
print("Entrance Fee Statistics (in INR):")
print(f"Mean Fee: ₹{df['Entrance Fee in INR'].mean():.2f}")
print(f"Median Fee: ₹{df['Entrance Fee in INR'].median():.2f}")
print(f"Min Fee: ₹{df['Entrance Fee in INR'].min():.2f}")
print(f"Max Fee: ₹{df['Entrance Fee in INR'].max():.2f}")
print(f"Free Places: {(df['Entrance Fee in INR'] == 0).sum()}")

print("\n" + "="*50)
print("\nTime Needed to Visit (hours):")
print(f"Mean Time: {df['time needed to visit in hrs'].mean():.2f} hours")
print(f"Median Time: {df['time needed to visit in hrs'].median():.2f} hours")
print(f"Min Time: {df['time needed to visit in hrs'].min():.2f} hours")
print(f"Max Time: {df['time needed to visit in hrs'].max():.2f} hours")

## Significance Categories & Best Time Analysis
Place significance and recommended visit times

In [None]:
# Significance distribution
print("Places by Significance:")
print(df['Significance'].value_counts())

print("\n" + "="*50)
print("\nBest Time to Visit Distribution:")
print(df['Best Time to visit'].value_counts())

print("\n" + "="*50)
print("\nDSLR Photography Allowed:")
print(df['DSLR Allowed'].value_counts())