# Database Viewer

This notebook provides an organized way to view and explore the SQLite database.


In [78]:
import sqlite3
import pandas as pd
from pathlib import Path

# Set up database path
# This notebook is in sql/view/, so we need to go up two levels to get to project root
current_dir = Path().resolve()

# Try different possible locations
if (current_dir / "data").exists():
    # Running from project root
    project_root = current_dir
elif (current_dir.parent / "data").exists():
    # Running from sql/ directory
    project_root = current_dir.parent
elif (current_dir.parent.parent / "data").exists():
    # Running from sql/view/ directory
    project_root = current_dir.parent.parent
else:
    # Fallback: go up two levels from sql/view/
    project_root = current_dir.parent.parent

db_path = project_root / "data" / "airbnb.db"

print(f"Current directory: {current_dir}")
print(f"Project root: {project_root}")
print(f"Database: {db_path}")
print(f"Exists: {db_path.exists()}")


Current directory: /Users/anishj29/Desktop/Github Projects/Airbnb-Price-Predictor/sql/view
Project root: /Users/anishj29/Desktop/Github Projects/Airbnb-Price-Predictor
Database: /Users/anishj29/Desktop/Github Projects/Airbnb-Price-Predictor/data/airbnb.db
Exists: True


## Database Overview


In [79]:
conn = sqlite3.connect(str(db_path))

# Get all tables
tables_query = "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;"
tables = pd.read_sql_query(tables_query, conn)
# Filter out system tables and calendar
tables = tables[~tables['name'].isin(['calendar', 'sqlite_sequence'])]
print("ðŸ“Š Tables in database:")
print(tables)

# Get row counts for each table
print("\nðŸ“ˆ Row counts:")
for table in tables['name']:
    count = pd.read_sql_query(f"SELECT COUNT(*) as count FROM {table}", conn)
    print(f"  {table}: {count['count'].iloc[0]:,} rows")


ðŸ“Š Tables in database:
            name
0        listing
1  neighbourhood

ðŸ“ˆ Row counts:
  listing: 27,797 rows
  neighbourhood: 295 rows


## Check City Distribution

Let's verify that listings exist for each city.


In [80]:
# Check listings by city
city_distribution = pd.read_sql_query("""
    SELECT 
        city,
        COUNT(*) as listing_count,
        AVG(price) as avg_price,
        MIN(price) as min_price,
        MAX(price) as max_price
    FROM listing
    GROUP BY city
    ORDER BY listing_count DESC
""", conn)

print("ðŸ“Š Listings by City:")
print(city_distribution)

# Specifically check for Boston
boston_count = pd.read_sql_query("""
    SELECT COUNT(*) as count 
    FROM listing 
    WHERE city = 'Boston'
""", conn)

boston_count_value = boston_count['count'].iloc[0]

if boston_count_value > 0:
    print(f"\nâœ“ Boston listings exist in database: {boston_count_value:,} listings")
    
    # Show sample Boston listings
    print("\nðŸ“‹ Sample Boston listings:")
    boston_samples = pd.read_sql_query("""
        SELECT 
            listing_id, 
            city,
            price, 
            borough,
            neighbourhood_name,
            accommodates,
            bedrooms,
            host_is_superhost,
            number_of_reviews
        FROM listing l
        LEFT JOIN neighbourhood n ON l.neighbourhood_id = n.neighbourhood_id
        WHERE city = 'Boston'
        LIMIT 10
    """, conn)
    print(boston_samples)
else:
    print(f"\nâœ— No Boston listings found in database")


ðŸ“Š Listings by City:
            city  listing_count   avg_price  min_price  max_price
0            NYC          19779  578.158299       10.0    50052.0
1  Washington DC           4735  397.113622       10.0    50000.0
2         Boston           3283  567.583613       26.0    50000.0

âœ“ Boston listings exist in database: 3,283 listings

ðŸ“‹ Sample Boston listings:
   listing_id    city  price borough neighbourhood_name  accommodates  \
0        3781  Boston  125.0    None        East Boston             2   
1        5506  Boston  129.0    None            Roxbury             2   
2        6695  Boston  168.0    None            Roxbury             4   
3        8789  Boston  140.0    None        Beacon Hill             2   
4       10811  Boston  166.0    None           Back Bay             3   
5       10813  Boston  202.0    None           Back Bay             2   
6       10986  Boston  202.0    None          North End             2   
7       18711  Boston  162.0    None        

## View Neighbourhood Table


In [81]:
df_neighbourhoods = pd.read_sql_query("SELECT * FROM neighbourhood ORDER BY borough, neighbourhood_name", conn)
print(f"Total neighbourhoods: {len(df_neighbourhoods)}")
print(f"\nBoroughs: {df_neighbourhoods['borough'].unique()}")
df_neighbourhoods.head(20)


Total neighbourhoods: 295

Boroughs: [None 'Bronx' 'Brooklyn' 'Manhattan' 'Queens' 'Staten Island']


Unnamed: 0,neighbourhood_id,borough,neighbourhood_name
0,231,,Allston
1,232,,Back Bay
2,233,,Bay Village
3,234,,Beacon Hill
4,235,,Brighton
5,257,,"Brightwood Park, Crestwood, Petworth"
6,258,,"Brookland, Brentwood, Langdon"
7,259,,"Capitol Hill, Lincoln Park"
8,260,,"Capitol View, Marshall Heights, Benning Heights"
9,261,,"Cathedral Heights, McLean Gardens, Glover Park"


## View Listing Table


In [82]:
# Sample listings with neighbourhood info
df_listings = pd.read_sql_query("""
    SELECT city, listing_id, price, borough, neighbourhood_name, 
           accommodates, bedrooms, beds, host_is_superhost, 
           number_of_reviews, review_scores_rating, bathrooms, availability_365
    FROM listing l
    LEFT JOIN neighbourhood n ON l.neighbourhood_id = n.neighbourhood_id
    LIMIT 50
""", conn)
df_listings


Unnamed: 0,city,listing_id,price,borough,neighbourhood_name,accommodates,bedrooms,beds,host_is_superhost,number_of_reviews,review_scores_rating,bathrooms,availability_365
0,NYC,2595,240.0,Manhattan,Midtown,1,0,1,0,47,4.68,1.0,289
1,Washington DC,3344,150.0,,"Downtown, Chinatown, Penn Quarters, Mount Vern...",2,1,3,0,10,5.0,1.0,362
2,Washington DC,3686,60.0,,Historic Anacostia,1,1,2,0,84,4.64,1.0,298
3,Boston,3781,125.0,,East Boston,2,1,1,1,26,4.96,1.0,326
4,Washington DC,3943,79.0,,"Edgewood, Bloomingdale, Truxton Circle, Eckington",2,1,1,1,546,4.86,1.0,331
5,Washington DC,4197,128.0,,"Capitol Hill, Lincoln Park",1,1,1,1,67,4.88,1.5,346
6,Boston,5506,129.0,,Roxbury,2,1,1,1,138,4.82,1.0,67
7,Washington DC,5589,87.0,,"Kalorama Heights, Adams Morgan, Lanier Heights",3,1,1,0,96,4.5,1.0,133
8,Boston,6695,168.0,,Roxbury,4,0,2,1,141,4.81,1.0,56
9,NYC,6848,96.0,Brooklyn,Williamsburg,3,2,1,1,195,4.59,1.0,285


In [83]:
# Statistics
stats = pd.read_sql_query("""
    SELECT 
        COUNT(*) as total_listings,
        ROUND(AVG(price), 2) as avg_price,
        MIN(price) as min_price,
        MAX(price) as max_price,
        ROUND(AVG(accommodates), 2) as avg_accommodates,
        ROUND(AVG(review_scores_rating), 2) as avg_rating
    FROM listing
""", conn)
print("ðŸ“Š Listing Statistics:")
stats


ðŸ“Š Listing Statistics:


Unnamed: 0,total_listings,avg_price,min_price,max_price,avg_accommodates,avg_rating
0,27797,546.07,10.0,50052.0,3.05,4.74


## Custom Queries

Add your own queries here to explore the data.


In [84]:
# Example: Listings by borough
query = """
    SELECT 
        n.borough,
        COUNT(*) as listing_count,
        ROUND(AVG(l.price), 2) as avg_price,
        ROUND(AVG(l.review_scores_rating), 2) as avg_rating
    FROM listing l
    LEFT JOIN neighbourhood n ON l.neighbourhood_id = n.neighbourhood_id
    GROUP BY n.borough
    ORDER BY listing_count DESC
"""
df_borough_stats = pd.read_sql_query(query, conn)
df_borough_stats


Unnamed: 0,borough,listing_count,avg_price,avg_rating
0,Manhattan,8249,1038.32,4.69
1,,8018,466.91,4.76
2,Brooklyn,7217,247.98,4.77
3,Queens,3201,292.69,4.76
4,Bronx,808,130.43,4.74
5,Staten Island,304,126.09,4.79


In [85]:
conn.close()
print("âœ“ Connection closed")


âœ“ Connection closed
