# Database Viewer

This notebook provides an organized way to view and explore the SQLite database.


In [1]:
import sqlite3
import pandas as pd
from pathlib import Path

# Set up database path
# This notebook is in sql/view/, so we need to go up two levels to get to project root
current_dir = Path().resolve()

# Try different possible locations
if (current_dir / "data").exists():
    # Running from project root
    project_root = current_dir
elif (current_dir.parent / "data").exists():
    # Running from sql/ directory
    project_root = current_dir.parent
elif (current_dir.parent.parent / "data").exists():
    # Running from sql/view/ directory
    project_root = current_dir.parent.parent
else:
    # Fallback: go up two levels from sql/view/
    project_root = current_dir.parent.parent

db_path = project_root / "data" / "airbnb.db"

print(f"Current directory: {current_dir}")
print(f"Project root: {project_root}")
print(f"Database: {db_path}")
print(f"Exists: {db_path.exists()}")


Current directory: /Users/pranavbathula/Library/CloudStorage/Box-Box/DATA MANAGEMENT FOR DATA SCIENCE/Airbnb-Price-Predictor/sql/view
Project root: /Users/pranavbathula/Library/CloudStorage/Box-Box/DATA MANAGEMENT FOR DATA SCIENCE/Airbnb-Price-Predictor
Database: /Users/pranavbathula/Library/CloudStorage/Box-Box/DATA MANAGEMENT FOR DATA SCIENCE/Airbnb-Price-Predictor/data/airbnb.db
Exists: True


## Database Overview


In [2]:
conn = sqlite3.connect(str(db_path))

# Get all tables
tables_query = "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;"
tables = pd.read_sql_query(tables_query, conn)
# Filter out system tables and calendar
tables = tables[~tables['name'].isin(['calendar', 'sqlite_sequence'])]
print("ðŸ“Š Tables in database:")
print(tables)

# Get row counts for each table
print("\nðŸ“ˆ Row counts:")
for table in tables['name']:
    count = pd.read_sql_query(f"SELECT COUNT(*) as count FROM {table}", conn)
    print(f"  {table}: {count['count'].iloc[0]:,} rows")


ðŸ“Š Tables in database:
            name
0        listing
1  neighbourhood

ðŸ“ˆ Row counts:
  listing: 14,436 rows
  neighbourhood: 230 rows


## View Neighbourhood Table


In [3]:
df_neighbourhoods = pd.read_sql_query("SELECT * FROM neighbourhood ORDER BY borough, neighbourhood_name", conn)
print(f"Total neighbourhoods: {len(df_neighbourhoods)}")
print(f"\nBoroughs: {df_neighbourhoods['borough'].unique()}")
df_neighbourhoods.head(20)


Total neighbourhoods: 230

Boroughs: ['Bronx' 'Brooklyn' 'Manhattan' 'Queens' 'Staten Island']


Unnamed: 0,neighbourhood_id,borough,neighbourhood_name
0,1,Bronx,Allerton
1,2,Bronx,Baychester
2,3,Bronx,Belmont
3,4,Bronx,Bronxdale
4,5,Bronx,Castle Hill
5,6,Bronx,City Island
6,7,Bronx,Claremont Village
7,8,Bronx,Clason Point
8,11,Bronx,Co-op City
9,9,Bronx,Concourse


## View Listing Table


In [4]:
# Sample listings with neighbourhood info
df_listings = pd.read_sql_query("""
    SELECT listing_id, price, borough, neighbourhood_name, 
           accommodates, bedrooms, beds, host_is_superhost, 
           number_of_reviews, review_scores_rating, bathrooms, availability_365
    FROM listing l
    LEFT JOIN neighbourhood n ON l.neighbourhood_id = n.neighbourhood_id
    LIMIT 30
""", conn)
df_listings


Unnamed: 0,listing_id,price,borough,neighbourhood_name,accommodates,bedrooms,beds,host_is_superhost,number_of_reviews,review_scores_rating,bathrooms,availability_365
0,2595,240.0,Manhattan,Midtown,1,0,1,0,47,4.68,1.0,289
1,6848,96.0,Brooklyn,Williamsburg,3,2,1,1,195,4.59,1.0,285
2,6872,59.0,Manhattan,East Harlem,1,1,1,0,1,5.0,1.0,83
3,6990,73.0,Manhattan,East Harlem,1,2,2,0,249,4.88,1.0,186
4,7097,216.0,Brooklyn,Fort Greene,2,1,2,1,423,4.89,1.0,0
5,8490,170.0,Brooklyn,Bedford-Stuyvesant,5,1,4,0,189,4.77,1.0,275
6,10452,90.0,Brooklyn,Bedford-Stuyvesant,2,1,2,1,82,4.66,1.0,244
7,12937,232.0,Queens,Long Island City,3,1,2,0,456,4.92,1.0,203
8,12940,151.0,Brooklyn,Bedford-Stuyvesant,2,1,1,0,80,4.58,1.0,286
9,14314,115.0,Brooklyn,Greenpoint,2,1,2,0,176,4.83,1.0,300


In [5]:
# Statistics
stats = pd.read_sql_query("""
    SELECT 
        COUNT(*) as total_listings,
        ROUND(AVG(price), 2) as avg_price,
        MIN(price) as min_price,
        MAX(price) as max_price,
        ROUND(AVG(accommodates), 2) as avg_accommodates,
        ROUND(AVG(review_scores_rating), 2) as avg_rating
    FROM listing
""", conn)
print("ðŸ“Š Listing Statistics:")
stats


ðŸ“Š Listing Statistics:


Unnamed: 0,total_listings,avg_price,min_price,max_price,avg_accommodates,avg_rating
0,14436,315.92,10.0,50052.0,2.91,4.74


## Custom Queries

Add your own queries here to explore the data.


In [6]:
# Example: Listings by borough
query = """
    SELECT 
        n.borough,
        COUNT(*) as listing_count,
        ROUND(AVG(l.price), 2) as avg_price,
        ROUND(AVG(l.review_scores_rating), 2) as avg_rating
    FROM listing l
    LEFT JOIN neighbourhood n ON l.neighbourhood_id = n.neighbourhood_id
    GROUP BY n.borough
    ORDER BY listing_count DESC
"""
df_borough_stats = pd.read_sql_query(query, conn)
df_borough_stats


Unnamed: 0,borough,listing_count,avg_price,avg_rating
0,Manhattan,5706,533.42,4.69
1,Brooklyn,5345,189.84,4.77
2,Queens,2534,156.16,4.76
3,Bronx,611,127.53,4.74
4,Staten Island,240,119.11,4.79


In [7]:
conn.close()
print("âœ“ Connection closed")


âœ“ Connection closed
