STEP: 1 - IMPORT DEPENDENCIES

In [None]:
# notebook_setup.py

# Data analysis
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# SQL
from sqlalchemy import create_engine, text
%load_ext sql


# Date/time handling
import datetime as dt

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.2f' % x)

# Visualization defaults
sns.set_theme(style="whitegrid")
plt.rcParams['figure.figsize'] = (10, 6)

print("✅ Common data analysis libraries loaded: pandas, numpy, seaborn, matplotlib, sqlalchemy")


STEP 2: CREATING A CONNECTION TO POSTGRES

In [None]:
import os
from dotenv import load_dotenv
from sqlalchemy import create_engine
from sqlalchemy.engine import URL

# Load environment variables from .env
load_dotenv()

# Read values
username = os.getenv("DB_USER")
password = os.getenv("DB_PASSWORD")
host = os.getenv("DB_HOST")
port = os.getenv("DB_PORT")
database = os.getenv("DB_NAME")

# Build connection URL (handles special characters like # automatically)
connection_url = URL.create(
    "postgresql+psycopg2",
    username=username,
    password=password,
    host=host,
    port=port,
    database=database
)

# Create engine
engine = create_engine(connection_url)

# Test connection
try:
    with engine.connect() as conn:
        print("✅ Connected to PostgreSQL successfully!")
except Exception as e:
    print("❌ Connection failed:", e)


STEP 3: ACCESSING THE TABLE

In [None]:
# List all schemas and tables you can see
query = """
SELECT table_schema, table_name
FROM information_schema.tables
WHERE table_type='BASE TABLE'
AND table_schema NOT IN ('pg_catalog','information_schema','pgagent');
"""
# Preview your Airbnb data (replace schema.table with your actual)
pd.read_sql("SELECT * FROM airbnb_listings.ab_nyc LIMIT 5;", engine)

# Row count
pd.read_sql("SELECT COUNT(*) FROM airbnb_listings.ab_nyc;", engine)


ANALYSIS

In [None]:
airbnb_df = pd.read_sql("SELECT * FROM airbnb_listings.ab_nyc;", engine)

# Quick summary
airbnb_df.info()
print(airbnb_df.describe())

# Example: Average price per neighbourhood_group
airbnb_df.groupby("neighbourhood_group")["price"].mean()


First few rows to confirm content and formatting

In [None]:
print(airbnb_df.shape)
airbnb_df.head()