In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px


In [None]:
url = "http://data.insideairbnb.com/united-states/ny/new-york-city/2023-09-05/visualisations/listings.csv"
df = pd.read_csv(url)
df.head()


In [None]:
df.info()
df.describe()
df.isnull().sum()


In [None]:
# Drop rows with missing price
df = df[df['price'].notna()]

# Convert price to numeric
df['price'] = df['price'].replace('[\$,]', '', regex=True).astype(float)

# Remove outliers
df = df[df['price'] < 1000]  # optional


In [None]:
top_neighborhoods = df.groupby('neighbourhood_cleansed')['price'].mean().sort_values(ascending=False).head(10)

plt.figure(figsize=(10, 6))
sns.barplot(x=top_neighborhoods.values, y=top_neighborhoods.index, palette='rocket')
plt.title('Top 10 Most Expensive Neighborhoods in NYC')
plt.xlabel('Average Price ($)')
plt.ylabel('Neighborhood')
plt.show()


In [None]:
sns.boxplot(data=df, x='room_type', y='price')
plt.ylim(0, 500)
plt.title("Price Distribution by Room Type")
plt.ylabel("Price ($)")
plt.show()


In [None]:
fig = px.scatter_mapbox(
    df.sample(1000),
    lat="latitude",
    lon="longitude",
    color="price",
    size="price",
    hover_name="name",
    mapbox_style="carto-positron",
    title="Airbnb Listings in NYC by Price",
    zoom=10,
    height=600
)
fig.show()


In [None]:
sns.scatterplot(data=df, x='availability_365', y='price', alpha=0.5)
plt.title("Availability vs Price")
plt.xlabel("Availability (days/year)")
plt.ylabel("Price ($)")
plt.show()
