In [182]:
import pandas as pd

In [183]:
# Load the saved datasets
final_fire_data = pd.read_csv("final_fire_data.csv")
final_home_price = pd.read_csv("final_home_price.csv")


In [184]:
# Convert ZIP code to string and pad if needed
final_fire_data["ZIP_CODE"] = final_fire_data["ZIP_CODE"].astype(str).str.zfill(5)
final_home_price["ZipCode"] = final_home_price["ZipCode"].astype(str).str.zfill(5)

# Convert year to int (if needed)
final_fire_data["YEAR_"] = final_fire_data["YEAR_"].astype(int)
final_home_price["YEAR"] = final_home_price["YEAR"].astype(int)


In [185]:
final_fire_data.shape

(1917, 7)

In [186]:
merged_data = pd.merge(
    final_home_price,
    final_fire_data,
    left_on=["ZipCode", "YEAR"],
    right_on=["ZIP_CODE", "YEAR_"],
    how="left",
    indicator=True
)

merged_data["_merge"].value_counts()

_merge
left_only     8750
both          1380
right_only       0
Name: count, dtype: int64

In [187]:
# Those are the ZIPs that aren’t included in Zillow’s home price data.

unmatched_zips = final_fire_data.loc[
    ~final_fire_data["ZIP_CODE"].isin(final_home_price["ZipCode"].unique()),
    "ZIP_CODE"
].value_counts()

unmatched_zips.head # 77 zips aren't included in zillow dataset

# there's 196 (1917 - 1721) fire that did not match because zillow does not include 77 zip codes
# those 77 ZIPs’ rows in the fire data are dropped in the final merge result, because we're joining from Zillow’s ZIPs only (left join), and those 77 ZIPs don’t exist there.

<bound method NDFrame.head of ZIP_CODE
95568    8
95043    7
96108    7
93633    7
93563    7
        ..
95372    1
95317    1
93628    1
96033    1
95552    1
Name: count, Length: 100, dtype: int64>

In [188]:
# Create a flag column for fire exposure
# 0 means no
# 1 means yes
merged_data["FIRE_EXPOSED"] = (merged_data["_merge"] == "both").astype(int)

# Drop merge indicator and other useless columns
col_drop = ["_merge", "ZIP_CODE", "YEAR_", "NEXT_YEAR_PRICE"]
merged_data.drop(columns=col_drop, inplace=True)

merged_data

Unnamed: 0,ZipCode,YEAR,RegionID,City,Metro,CountyName,HOME_PRICE,PRICE_CHANGE,PCT_PRICE_CHANGE (%),NUM_FIRES,TOTAL_ACRES_BURNED_IN_ZIP,AVG_FIRE_DURATION_DAYS,MAX_PCT_ZIP_BURNED,ANY_MAJOR_FIRE,FIRE_EXPOSED
0,90001,2012,95982,Florence-Graham,"Los Angeles-Long Beach-Anaheim, CA",Los Angeles County,173120.150950,40741.986132,23.533936,,,,,,0
1,90001,2013,95982,Florence-Graham,"Los Angeles-Long Beach-Anaheim, CA",Los Angeles County,213862.137082,36859.316468,17.235083,,,,,,0
2,90001,2014,95982,Florence-Graham,"Los Angeles-Long Beach-Anaheim, CA",Los Angeles County,250721.453550,4484.383699,1.788592,,,,,,0
3,90001,2015,95982,Florence-Graham,"Los Angeles-Long Beach-Anaheim, CA",Los Angeles County,255205.837249,25174.378994,9.864343,,,,,,0
4,90001,2016,95982,Florence-Graham,"Los Angeles-Long Beach-Anaheim, CA",Los Angeles County,280380.216244,51139.857090,18.239467,,,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10125,96161,2014,98672,Truckee,"Truckee-Grass Valley, CA",Nevada County,550710.293321,17911.600932,3.252454,,,,,,0
10126,96161,2015,98672,Truckee,"Truckee-Grass Valley, CA",Nevada County,568621.894253,29804.249300,5.241488,,,,,,0
10127,96161,2016,98672,Truckee,"Truckee-Grass Valley, CA",Nevada County,598426.143553,45546.851495,7.611107,,,,,,0
10128,96161,2017,98672,Truckee,"Truckee-Grass Valley, CA",Nevada County,643972.995049,36393.255854,5.651364,,,,,,0


In [189]:
oo = merged_data.loc[merged_data["FIRE_EXPOSED"] == 1]
oo.head(20)

Unnamed: 0,ZipCode,YEAR,RegionID,City,Metro,CountyName,HOME_PRICE,PRICE_CHANGE,PCT_PRICE_CHANGE (%),NUM_FIRES,TOTAL_ACRES_BURNED_IN_ZIP,AVG_FIRE_DURATION_DAYS,MAX_PCT_ZIP_BURNED,ANY_MAJOR_FIRE,FIRE_EXPOSED
143,90027,2015,96008,Los Angeles,"Los Angeles-Long Beach-Anaheim, CA",Los Angeles County,1057405.0,61864.537331,5.850597,1.0,4.7302,0.0,0.089696,0.0,1
287,90049,2012,96030,Los Angeles,"Los Angeles-Long Beach-Anaheim, CA",Los Angeles County,1642617.0,311152.007681,18.942452,1.0,39.644336,1.0,0.412962,0.0,1
290,90049,2015,96030,Los Angeles,"Los Angeles-Long Beach-Anaheim, CA",Los Angeles County,1880899.0,105512.747439,5.609697,1.0,0.251434,,0.002619,0.0,1
292,90049,2017,96030,Los Angeles,"Los Angeles-Long Beach-Anaheim, CA",Los Angeles County,2479988.0,716128.067841,28.876269,1.0,243.229969,9.0,2.533646,0.0,1
382,90077,2012,96058,Los Angeles,"Los Angeles-Long Beach-Anaheim, CA",Los Angeles County,1307452.0,228485.992941,17.475671,1.0,13.878707,1.0,0.291472,0.0,1
387,90077,2017,96058,Los Angeles,"Los Angeles-Long Beach-Anaheim, CA",Los Angeles County,2009354.0,486641.126398,24.218784,1.0,176.690177,9.0,3.710731,0.0,1
543,90265,2012,96116,Malibu,"Los Angeles-Long Beach-Anaheim, CA",Los Angeles County,1468093.0,186187.083919,12.682241,1.0,1.005463,0.0,0.001662,0.0,1
544,90265,2013,96116,Malibu,"Los Angeles-Long Beach-Anaheim, CA",Los Angeles County,1654280.0,185283.917682,11.200275,2.0,1889.624969,4.5,3.122382,0.0,1
545,90265,2014,96116,Malibu,"Los Angeles-Long Beach-Anaheim, CA",Los Angeles County,1839564.0,-196879.647731,-10.702517,9.0,2.579399,0.0,0.001309,0.0,1
546,90265,2015,96116,Malibu,"Los Angeles-Long Beach-Anaheim, CA",Los Angeles County,1642684.0,129122.680928,7.860468,7.0,12.945944,0.0,0.008914,0.0,1


In [190]:
# FINAL XGBOOST DATA

Xgboost_data = merged_data.copy()