# 🏠 Airbnb Data Analysis (Exploratory Data Analysis)

A simple project to analyze Airbnb dataset using Python, showcasing steps in Exploratory Data Analysis (EDA).

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder

In [None]:
# Load and clean data
df = pd.read_csv('Airbnb_Open_Data.csv', encoding='utf-8')
df['price'] = df['price'].replace(r'[\$,]', '', regex=True).astype(float)
df['number of reviews'] = pd.to_numeric(df['number of reviews'], errors='coerce')
df = df.dropna(subset=['price', 'number of reviews', 'room type'])

In [None]:
# Preview the first 5 rows
df.head()

In [None]:
# Replace first value in 'room type' with mode
room_mode = df['room type'].mode()[0]
df.iloc[0, df.columns.get_loc('room type')] = room_mode
print(f"First value in 'room type' replaced with mode: {room_mode}")

In [None]:
# Encode 'room type'
le = LabelEncoder()
df['room type encoded'] = le.fit_transform(df['room type'])
df[['room type', 'room type encoded']].head()

In [None]:
# Plot histogram of prices
plt.hist(df['price'], bins=30, color='black', alpha=0.8)
plt.xlabel('Price')
plt.ylabel('Frequency')
plt.title('Price Distribution')
plt.show()

In [None]:
# Correlation between price and number of reviews
correlation = df['price'].corr(df['number of reviews'])
print(f"Correlation coefficient between price and number of reviews: {correlation:.2f}")

In [None]:
# Boxplot for price outliers
plt.boxplot(df['price'])
plt.ylabel('Price')
plt.title('Price Outliers')
plt.show()

In [None]:
# Bar chart of room types
room_counts = df['room type'].value_counts()
room_counts.plot(kind='bar', color='gray')
plt.xlabel('Room Type')
plt.ylabel('Count')
plt.title('Room Type Distribution')
plt.show()

In [None]:
# Dashboard with multiple plots
plt.figure(figsize=(10, 8))

plt.subplot(2, 2, 1)
plt.hist(df['price'], bins=30, color='black', alpha=0.8)
plt.xlabel('Price')
plt.ylabel('Frequency')
plt.title('Price Distribution')

plt.subplot(2, 2, 2)
plt.scatter(df['price'], df['number of reviews'], color='black', alpha=0.5)
plt.xlabel('Price')
plt.ylabel('Number of Reviews')
plt.title('Price vs. Reviews')

plt.subplot(2, 2, 3)
plt.boxplot(df['price'])
plt.ylabel('Price')
plt.title('Boxplot')

plt.subplot(2, 2, 4)
corr_matrix = df[['price', 'number of reviews']].corr()
sns.heatmap(corr_matrix, annot=True, fmt='.2g', cmap='Blues')
plt.title('Correlation Heatmap')

plt.tight_layout()
plt.show()