In [1]:
import pandas as pd
import json
import os

# Philly 311
philly_311 = pd.read_csv("../data/raw/philly_311_raw.csv", low_memory=False)

print("Philly 311 shape: ", philly_311.shape)
print(philly_311.dtypes)

Philly 311 shape:  (518841, 18)
objectid                int64
service_request_id      int64
subject                object
status                 object
status_notes           object
service_name           object
service_code           object
agency_responsible     object
service_notice         object
requested_datetime     object
updated_datetime       object
expected_datetime      object
closed_datetime        object
address                object
zipcode               float64
media_url              object
lat                   float64
lon                   float64
dtype: object


In [2]:
# Yelp
yelp_business = pd.read_json(
    "../data/raw/yelp_academic_dataset_business.json",
    lines = True
)

print("Yelp Business shape:", yelp_business.shape)
print(yelp_business.dtypes)

Yelp Business shape: (150346, 14)
business_id      object
name             object
address          object
city             object
state            object
postal_code      object
latitude        float64
longitude       float64
stars           float64
review_count      int64
is_open           int64
attributes       object
categories       object
hours            object
dtype: object


## Philly 311 Schema
| Column               | Type    | Description                                      |
|----------------------|---------|--------------------------------------------------|
| objectid             | int64   | Unique record identifier                         |
| service_request_id   | int64   | 311 service request ID                           |
| subject              | string  | Brief subject/title of the request               |
| status               | string  | Current status (e.g., Open, Closed)              |
| status_notes         | string  | Additional notes on status                       |
| service_name         | string  | Type/category of service requested               |
| service_code         | string  | Code corresponding to the service name           |
| agency_responsible   | string  | City agency assigned to handle the request       |
| service_notice       | string  | Notice or expected response info                 |
| requested_datetime   | string  | Date/time the request was submitted              |
| updated_datetime     | string  | Date/time the request was last updated           |
| expected_datetime    | string  | Expected completion date/time                    |
| closed_datetime      | string  | Date/time the request was closed                 |
| address              | string  | Street address of the request                    |
| zipcode              | float64 | ZIP code (has nulls)                             |
| media_url            | string  | URL to associated media/photo                    |
| lat                  | float64 | Latitude (has nulls)                             |
| lon                  | float64 | Longitude (has nulls)                            |

Records: 518,841 | Columns: 18

---

## Yelp Business Schema
| Column         | Type    | Description                                       |
|----------------|---------|---------------------------------------------------|
| business_id    | string  | Unique Yelp business identifier                   |
| name           | string  | Business name                                     |
| address        | string  | Street address                                    |
| city           | string  | City                                              |
| state          | string  | State abbreviation                                |
| postal_code    | string  | ZIP/postal code                                   |
| latitude       | float64 | Latitude                                          |
| longitude      | float64 | Longitude                                         |
| stars          | float64 | Average star rating (1.0â€“5.0)                     |
| review_count   | int64   | Number of reviews                                 |
| is_open        | int64   | Whether the business is open (1) or closed (0)    |
| attributes     | string  | JSON string of business attributes                |
| categories     | string  | Comma-separated list of business categories       |
| hours          | string  | JSON string of operating hours                    |

Records: 150,346 | Columns: 14