# H3 Analysis

## Load Data

In [None]:
import pandas as pd
import numpy as np

# Load your acs_df dataframe here
# For example:
# acs_df = pd.read_csv('path/to/your/acs_df.csv')

# Make sure to generate the 'latitude', 'longitude', and 'h3_index' columns as you did in your main notebook.

## ยง8: H3 Data Aggregation

In [None]:
# Aggregate data by H3 index
numeric_cols = acs_df.select_dtypes(include=np.number).columns.tolist()
h3_df = acs_df.groupby('h3_index')[numeric_cols].mean().reset_index()

print('Aggregated H3 dataframe shape:', h3_df.shape)
h3_df.head()

## Train New Model on H3 Data

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report

# Define features (X) and target (y)
X_h3 = h3_df.drop(columns=['high_rent_burden', 'h3_index'])
y_h3 = (h3_df['high_rent_burden'] > 0.5).astype(int)  # Binarize the target

# Split the data
X_train_h3, X_test_h3, y_train_h3, y_test_h3 = train_test_split(X_h3, y_h3, test_size=0.2, random_state=42)

# Initialize and train the model (assuming the same pipeline as your original model)
# You might need to adjust the pipeline to handle the H3 aggregated data
h3_model = GradientBoostingClassifier(random_state=42)
h3_model.fit(X_train_h3, y_train_h3)

# Make predictions
y_pred_h3 = h3_model.predict(X_test_h3)

# Evaluate the model
print(classification_report(y_test_h3, y_pred_h3))