In [None]:
# Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
# Load Data
import os

data_path = '/content/drive/MyDrive/Airbnb Hotel data set/'
excel_file = data_path + '1730285881-Airbnb_Open_Data.xlsx'
df = pd.read_excel(excel_file)
print(f"Dataset loaded successfully! Shape: {df.shape}")
print(f"\nColumns: {list(df.columns)}")
df.head()

In [None]:
# 1. Booking Patterns Analysis
if 'last review' in df.columns:
    df['last review'] = pd.to_datetime(df['last review'], errors='coerce')
    booking_data = df[df['last review'].notna()]
    if len(booking_data) > 0:
        plt.figure(figsize=(10, 6))
        booking_data.set_index('last review').resample('M').size().plot()
        plt.title('Monthly Booking Trends')
        plt.xlabel('Month')
        plt.ylabel('Reviews')
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.show()
        print(f"Total: {len(booking_data)}")
    else:
        print("No valid date data.")
else:
    print("'last review' not found.")

In [None]:
# 2. Pricing Strategies
if 'price' in df.columns:
    plt.figure(figsize=(10, 6))
    sns.histplot(df['price'].dropna(), bins=50)
    plt.title('Distribution of Listing Prices')
    plt.xlabel('Price')
    plt.ylabel('Count')
    plt.show()

    if 'neighbourhood' in df.columns:
        plt.figure(figsize=(12, 6))
        df.groupby('neighbourhood')['price'].mean().sort_values().tail(20).plot(kind='bar')
        plt.title('Top 20 Neighbourhoods by Average Price')
        plt.ylabel('Average Price')
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.show()
else:
    print("'price' column not found.")

In [None]:
# 3. Guest Preferences
if 'room type' in df.columns:
    plt.figure(figsize=(8, 5))
    df['room type'].value_counts().plot(kind='bar')
    plt.title('Guest Room Type Preferences')
    plt.xlabel('Room Type')
    plt.ylabel('Number of Listings')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
    print("\nRoom Type Counts:")
    print(df['room type'].value_counts())
else:
    print("'room type' column not found.")

In [None]:
# 4. Host Performance
if 'host id' in df.columns and 'number of reviews' in df.columns:
    host_reviews = df.groupby('host id')['number of reviews'].sum().sort_values(ascending=False)
    plt.figure(figsize=(10, 5))
    host_reviews.head(20).plot(kind='bar')
    plt.title('Top 20 Hosts by Total Reviews')
    plt.ylabel('Total Reviews')
    plt.xlabel('Host ID')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
    print(f"\nTop 5 Hosts by Reviews:")
    print(host_reviews.head())
else:
    print("Required columns not found.")

In [None]:
# 5. Summary Statistics Table
import warnings
warnings.filterwarnings('ignore')

summary = {
    'Total Listings': df.shape[0],
    'Average Price': df['price'].mean() if 'price' in df.columns else None,
    'Median Price': df['price'].median() if 'price' in df.columns else None,
    'Most Common Room Type': df['room type'].mode()[0] if 'room type' in df.columns else None,
    'Average Reviews': df['number of reviews'].mean() if 'number of reviews' in df.columns else None,
    'Total Neighbourhoods': df['neighbourhood'].nunique() if 'neighbourhood' in df.columns else None
}

print("="*50)
print("SUMMARY STATISTICS")
print("="*50)
for key, value in summary.items():
    print(f"{key}: {value}")
print("="*50)