# Data Pull Preview

This notebook fetches NYC 311 data and provides an initial preview of the dataset.

In [None]:
import subprocess
import sys
import pandas as pd

# Run fetch script for 30 days
print("Fetching NYC 311 data (last 30 days)...")
result = subprocess.run(
    [sys.executable, "scripts/fetch_311.py", "--days", "30"],
    capture_output=True,
    text=True
)
print(result.stdout)
if result.returncode != 0:
    print(f"Error: {result.stderr}")

In [None]:
# Load CSV data
df = pd.read_csv("data/raw/311.csv")
print(f"Loaded {len(df):,} rows and {len(df.columns)} columns")

## Schema Information

In [None]:
# Display data types and schema
print("Data Types:")
print(df.dtypes)
print("\n" + "="*50)
df.info()

## Missing Values Analysis

In [None]:
# Calculate missing percentage per column
missing_summary = pd.DataFrame({
    'Column': df.columns,
    'Missing Count': df.isnull().sum(),
    'Missing %': (df.isnull().sum() / len(df) * 100).round(2)
})
missing_summary = missing_summary.sort_values('Missing %', ascending=False)
print(missing_summary.to_string(index=False))

## Sample Data

In [None]:
# Display first few rows
df.head(10)