In [3]:
## Attaches burn probabilities to the ZTRAX data
## Uses lat and long provided 
import pandas as pd
import numpy as np
import geopandas as gpd
import os
import glob
import matplotlib.pyplot as plt
import rasterio as rio
from rasterio.plot import show
from functools import reduce

In [4]:
pd.options.display.max_columns = None


In [5]:
## Get US Map for context and select Western United States
##Convert Coordinate Reference System to US Natinoal Atlas Equal Area
USMAP = gpd.read_file('/data/yoder/DensityProject/cb_2018_us_state_500k.shp')
USMAP = USMAP.to_crs("EPSG:2163")
West = ['WA', 'OR', 'CA', 'NV','AZ','NM', 'UT','CO','WY','ID','MT']

## Get map(s) of individual states if desired
WestStates = USMAP[USMAP['STUSPS'].isin(West)]

In [6]:
scratchpath = '/data/yoder/AllZillow/HedonicData/'
os.chdir(scratchpath)
## List all tables included in Zillow Variable names excel file
ZTransLayout = pd.read_excel('Layout.xlsx','ZTrans')
ZTransLayout['TableName'].unique()
ZAsmtLayout = pd.read_excel('Layout.xlsx','ZAsmt')

## Create dataframes from each table name in the Zillow table dictionary
    ## Strip 'ut' from column names
T = {}
for name,group in ZTransLayout.groupby('TableName'):
    T[str(name)[2:]] = group
A = {}
for name, group in ZAsmtLayout.groupby('TableName'):
    A[str(name)[2:]] = group
    
    
## Dictionary of Western 11 states using Zillows numbering: 
states = {'CO':'08'}
# states = {'AZ':'04',
#           'CA':'06',
#           'CO':'08',
#           'ID':'16',
#           'MT':'30',
#           'NV':'32',
#           'NM':'35',
#           'OR':'41',
#           'UT':'49',
#           'WA':'53',
#           'WY':'56'}

In [7]:
#Change this value to the state and table you care about
# State to use
# state = states[West]
state_map = USMAP[USMAP['STUSPS']=='CO']
#Transaction tables to use
Ttables = ['Main', 'PropertyInfo']
# Assessment tables to use
Atables = ['Main','Garage', 'Building', 'Value']


## Fields to use from transaction tables
Tfields = {'Main': ['TransId','State','County','DocumentDate',
                    'RecordingDate','FIPS','DataClassStndCode',
                    'DocumentTypeStndCode',
                    'LoanTypeStndCode','PropertyUseStndCode',
                    'SalesPriceAmount', 'LoanAmount'],
            'PropertyInfo':['TransId','ImportParcelID','AssessorParcelNumber',
                            'PropertyFullStreetAddress','PropertyCity',
                            'PropertyState','PropertyAddressLatitude',
                            'PropertyAddressLongitude']}
## Fields not currently used
## 'IntraFamilyTransferFlag', 'SignatureDate',
# Fields to use from Assessment tables
Afields = {'Main': ['RowID','ImportParcelID','LotSizeSquareFeet'],
          'Garage':['RowID','GarageStndCode','GarageAreaSqFt'],
          'Building':['RowID','ArchitecturalStyleStndCode','BuildingClassStndCode','BuildingQualityStndCode',
                       'BuildingConditionStndCode','EffectiveYearBuilt','YearBuilt','YearRemodeled',
                       'NoOfStories','TotalRooms','TotalBedrooms','TotalKitchens',
                       'FullBath','ThreeQuarterBath','HalfBath','QuarterBath','TotalActualBathCount',
                       'TotalBathPlumbingFixtures','RoofCoverStndCode', 'RoofStructureTypeStndCode',
                       'HeatingTypeorSystemStndCode','AirConditioningTypeorSystemStndCode',
                       'FoundationTypeStndCode','ElevatorStndCode','FireplaceFlag',
                       'FireplaceNumber','WaterStndCode','SewerStndCode'],
          'Value': ['RowID','LandAssessedValue','ImprovementAssessedValue','TotalAssessedValue',
                    'AssessmentYear','LandMarketValue','ImprovementMarketValue','TotalMarketValue',
                    'MarketValueYear','LandAppraisalValue','ImprovementAppraisalValue','TotalAppraisalValue',
                    'AppraisalValueYear']} 
## Throws error if these are included 'FireplaceTypeStndCode','StoryTypeCode'
## Not currenlty used ,'TimeshareStndCode'

In [6]:
%%time
sts_trans = {}
sts_vals = {}
SetofStates = {}
for state in states.values():
    keys = Ttables
    dfs = {}
    for i in keys:
        print(f'Started loading {state} table {i}')
        dfs[i] = pd.read_csv(f'{state}/ZTrans\{i}.txt',sep = '|', header=None,names=T[i]['FieldName'].tolist(), encoding='latin1',usecols=Tfields[i],low_memory=False)
        print(f'Finished loading {state} Trans, table {i}')
    sts_trans[state] = reduce(lambda left,right: pd.merge(left,right,on='TransId',how='left'),dfs.values())
    print(f'Finished All Transactions Tables')
    dfA = {}
    keys = Atables
    for j in keys:
        print(f'Started Loading {state} Asmt, table {j} ')
        dfA[j] = pd.read_csv(f'{state}/ZAsmt\{j}.txt',sep='|', header=None,names=A[j]['FieldName'].tolist(),encoding='latin1',usecols=Afields[j],low_memory=False)
        print(f'Finished Loading {state} Asmt, table {j}')
    sts_vals[state] =  reduce(lambda left, right:pd.merge(left,right,on='RowID',how='left'),dfA.values())


Started loading 08 table Main
Finished loading 08 Trans, table Main
Started loading 08 table PropertyInfo
Finished loading 08 Trans, table PropertyInfo
Finished All Transactions Tables
Started Loading 08 Asmt, table Main 
Finished Loading 08 Asmt, table Main
Started Loading 08 Asmt, table Garage 
Finished Loading 08 Asmt, table Garage
Started Loading 08 Asmt, table Building 
Finished Loading 08 Asmt, table Building
Started Loading 08 Asmt, table Value 
Finished Loading 08 Asmt, table Value
CPU times: user 2min 20s, sys: 52.8 s, total: 3min 13s
Wall time: 3min 29s


In [7]:
SetofStates[state] = pd.merge(sts_trans[state],sts_vals[state],on='ImportParcelID', how='left')

In [12]:
sts_vals[state]

Unnamed: 0,RowID,ImportParcelID,LotSizeSquareFeet,GarageStndCode,GarageAreaSqFt,BuildingClassStndCode,BuildingQualityStndCode,BuildingConditionStndCode,ArchitecturalStyleStndCode,YearBuilt,EffectiveYearBuilt,YearRemodeled,NoOfStories,TotalRooms,TotalBedrooms,TotalKitchens,FullBath,ThreeQuarterBath,HalfBath,QuarterBath,TotalActualBathCount,TotalBathPlumbingFixtures,RoofCoverStndCode,RoofStructureTypeStndCode,HeatingTypeorSystemStndCode,AirConditioningTypeorSystemStndCode,FoundationTypeStndCode,ElevatorStndCode,FireplaceFlag,FireplaceNumber,WaterStndCode,SewerStndCode,LandAssessedValue,ImprovementAssessedValue,TotalAssessedValue,AssessmentYear,LandMarketValue,ImprovementMarketValue,TotalMarketValue,MarketValueYear,LandAppraisalValue,ImprovementAppraisalValue,TotalAppraisalValue,AppraisalValueYear
0,CB00D515-820C-E811-80C3-3863BB430E3F,21607550,10019.0,AT,,,C,AV,,2002.0,,,2.0,9.0,4.0,,,,,,2.5,13.0,CS,GBL,FA,,,,,1.0,,,6120.0,21700.0,27820.0,2017,85000.0,301337.0,386337.0,2017.0,,,,
1,CB00D515-820C-E811-80C3-3863BB430E3F,21607550,10019.0,GR,1.0,,C,AV,,2002.0,,,2.0,9.0,4.0,,,,,,2.5,13.0,CS,GBL,FA,,,,,1.0,,,6120.0,21700.0,27820.0,2017,85000.0,301337.0,386337.0,2017.0,,,,
2,CC00D515-820C-E811-80C3-3863BB430E3F,21684561,6534.0,DT,,,C,AV,RA,1949.0,,,1.0,4.0,2.0,,1.0,,0.0,,,6.0,CS,GBL,FA,,,,,,,,4320.0,9400.0,13720.0,2017,60000.0,130622.0,190622.0,2017.0,,,,
3,CC00D515-820C-E811-80C3-3863BB430E3F,21684561,6534.0,GR,1.0,,C,AV,RA,1949.0,,,1.0,4.0,2.0,,1.0,,0.0,,,6.0,CS,GBL,FA,,,,,,,,4320.0,9400.0,13720.0,2017,60000.0,130622.0,190622.0,2017.0,,,,
4,CD00D515-820C-E811-80C3-3863BB430E3F,21577693,7841.0,AT,,,B,AV,,2001.0,,,2.0,8.0,4.0,,3.0,,0.0,,,13.0,CS,GBL,,CE,,,,1.0,,,7490.0,19060.0,26550.0,2017,104000.0,264786.0,368786.0,2017.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3971918,8C032423-6A47-E811-80C3-3863BB430E3F,21774073,6926.0,AT,,,C,AV,,1980.0,,,,0.0,2.0,,2.0,,0.0,,,6.0,CS,GHP,FA,EC,CN,,,1.0,,,,,19030.0,2017,55000.0,209300.0,264300.0,2017.0,,,,
3971919,8C032423-6A47-E811-80C3-3863BB430E3F,21774073,6926.0,GR,440.0,,C,AV,,1980.0,,,,0.0,2.0,,2.0,,0.0,,,6.0,CS,GHP,FA,EC,CN,,,1.0,,,,,19030.0,2017,55000.0,209300.0,264300.0,2017.0,,,,
3971920,8D032423-6A47-E811-80C3-3863BB430E3F,21911571,9453.0,AT,,,C,AV,RA,1950.0,,,1.0,0.0,0.0,,1.0,,0.0,,,3.0,CS,GHP,FA,NO,CN,,,,,,,,22824.0,2017,137500.0,179500.0,317000.0,2017.0,,,,
3971921,8D032423-6A47-E811-80C3-3863BB430E3F,21911571,9453.0,GR,308.0,,C,AV,RA,1950.0,,,1.0,0.0,0.0,,1.0,,0.0,,,3.0,CS,GHP,FA,NO,CN,,,,,,,,22824.0,2017,137500.0,179500.0,317000.0,2017.0,,,,


In [13]:
%%time
df = pd.concat(SetofStates.values(), ignore_index=True)

CPU times: user 2.6 s, sys: 4.14 s, total: 6.74 s
Wall time: 6.74 s


In [14]:
df

Unnamed: 0,TransId,FIPS,State,County,DataClassStndCode,RecordingDate,DocumentTypeStndCode,DocumentDate,SalesPriceAmount,PropertyUseStndCode,LoanAmount,LoanTypeStndCode,AssessorParcelNumber,PropertyFullStreetAddress,PropertyCity,PropertyState,PropertyAddressLatitude,PropertyAddressLongitude,ImportParcelID,RowID,LotSizeSquareFeet,GarageStndCode,GarageAreaSqFt,BuildingClassStndCode,BuildingQualityStndCode,BuildingConditionStndCode,ArchitecturalStyleStndCode,YearBuilt,EffectiveYearBuilt,YearRemodeled,NoOfStories,TotalRooms,TotalBedrooms,TotalKitchens,FullBath,ThreeQuarterBath,HalfBath,QuarterBath,TotalActualBathCount,TotalBathPlumbingFixtures,RoofCoverStndCode,RoofStructureTypeStndCode,HeatingTypeorSystemStndCode,AirConditioningTypeorSystemStndCode,FoundationTypeStndCode,ElevatorStndCode,FireplaceFlag,FireplaceNumber,WaterStndCode,SewerStndCode,LandAssessedValue,ImprovementAssessedValue,TotalAssessedValue,AssessmentYear,LandMarketValue,ImprovementMarketValue,TotalMarketValue,MarketValueYear,LandAppraisalValue,ImprovementAppraisalValue,TotalAppraisalValue,AppraisalValueYear
0,97922611,8001,CO,ADAMS,H,1993-09-01,WRDE,,75000.0,,75900.0,,1719-14-4-02-039,1860 E 98TH PL,THORNTON,CO,39.876425,-104.964230,21608543.0,42751A16-820C-E811-80C3-3863BB430E3F,7841.0,AT,,,C,AV,RA,1975.0,,,1.0,5.0,3.0,,1.0,,0.0,,,896.0,CS,GBL,FA,,,,,,,,5760.0,10060.0,15820.0,2017.0,80000.0,139740.0,219740.0,2017.0,,,,
1,97922611,8001,CO,ADAMS,H,1993-09-01,WRDE,,75000.0,,75900.0,,1719-14-4-02-039,1860 E 98TH PL,THORNTON,CO,39.876425,-104.964230,21608543.0,42751A16-820C-E811-80C3-3863BB430E3F,7841.0,GR,816.0,,C,AV,RA,1975.0,,,1.0,5.0,3.0,,1.0,,0.0,,,896.0,CS,GBL,FA,,,,,,,,5760.0,10060.0,15820.0,2017.0,80000.0,139740.0,219740.0,2017.0,,,,
2,97922612,8001,CO,ADAMS,H,1993-09-01,WRDE,,194100.0,UL,120000.0,,1719-05-1-10-031,11673 DECATUR DR,WESTMINSTER,CO,39.908376,-105.019622,21593845.0,19ED3B16-820C-E811-80C3-3863BB430E3F,7841.0,AT,,,B,GD,,1993.0,2013.0,2013.0,2.0,7.0,5.0,,,,,,2.5,15.0,WS,GBL,FA,,,,,1.0,,,9360.0,19730.0,29090.0,2017.0,130000.0,274070.0,404070.0,2017.0,,,,
3,97922612,8001,CO,ADAMS,H,1993-09-01,WRDE,,194100.0,UL,120000.0,,1719-05-1-10-031,11673 DECATUR DR,WESTMINSTER,CO,39.908376,-105.019622,21593845.0,19ED3B16-820C-E811-80C3-3863BB430E3F,7841.0,GR,1.0,,B,GD,,1993.0,2013.0,2013.0,2.0,7.0,5.0,,,,,,2.5,15.0,WS,GBL,FA,,,,,1.0,,,9360.0,19730.0,29090.0,2017.0,130000.0,274070.0,404070.0,2017.0,,,,
4,97922613,8001,CO,ADAMS,H,1993-09-01,WRDE,,114900.0,,103410.0,,1719-05-3-09-042,3265 W 115TH PL,WESTMINSTER,CO,39.905624,-105.027946,21594200.0,DA2E5D16-820C-E811-80C3-3863BB430E3F,5663.0,AT,,,B,AV,,1990.0,,,,5.0,3.0,,2.0,,0.0,,,9.0,CS,GBL,FA,,,,,1.0,,,9360.0,12700.0,22060.0,2017.0,130000.0,176455.0,306455.0,2017.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19877934,449200102,8125,CO,YUMA,D,2018-10-18,INTR,2000-06-22,0.0,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
19877935,449200103,8125,CO,YUMA,D,2018-10-18,INTR,2000-01-21,0.0,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
19877936,449200104,8125,CO,YUMA,D,2018-10-18,INTR,2000-06-27,0.0,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
19877937,449200105,8125,CO,YUMA,D,2018-10-18,INTR,2000-04-25,0.0,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [15]:
## Select only valid transactions
UnwantedLoanCodes =  ['AC','CT','CM','CS','CC','CL','DP','FO','FE','HE','LC','EB','EX','MD','NA','NP','FA','RE','RM','SM','SE','TR']
WantedLoanCodes =  ['AS','BL','CE','FM','PM','RD','SL']
DocumentTypeStndCodeDrop= ['CRDE','JTDE','QCDE','RRDE','VLDE']
DocumentTypeStndCodeKeep= ['CPDE','IDDE','PRDE','WRDE']

In [16]:
df.columns

Index(['TransId', 'FIPS', 'State', 'County', 'DataClassStndCode',
       'RecordingDate', 'DocumentTypeStndCode', 'DocumentDate',
       'SalesPriceAmount', 'PropertyUseStndCode', 'LoanAmount',
       'LoanTypeStndCode', 'AssessorParcelNumber', 'PropertyFullStreetAddress',
       'PropertyCity', 'PropertyState', 'PropertyAddressLatitude',
       'PropertyAddressLongitude', 'ImportParcelID', 'RowID',
       'LotSizeSquareFeet', 'GarageStndCode', 'GarageAreaSqFt',
       'BuildingClassStndCode', 'BuildingQualityStndCode',
       'BuildingConditionStndCode', 'ArchitecturalStyleStndCode', 'YearBuilt',
       'EffectiveYearBuilt', 'YearRemodeled', 'NoOfStories', 'TotalRooms',
       'TotalBedrooms', 'TotalKitchens', 'FullBath', 'ThreeQuarterBath',
       'HalfBath', 'QuarterBath', 'TotalActualBathCount',
       'TotalBathPlumbingFixtures', 'RoofCoverStndCode',
       'RoofStructureTypeStndCode', 'HeatingTypeorSystemStndCode',
       'AirConditioningTypeorSystemStndCode', 'FoundationTypeStnd

In [17]:
## Select only valid transactions
st = df.loc[(df['SalesPriceAmount'] > 0) &
            (~df['LoanTypeStndCode'].isin(UnwantedLoanCodes)) &
#             (pd.isnull(df['IntraFamilyTransferFlag'])) &
            (df['PropertyUseStndCode']=='SR') &
            (df['DocumentTypeStndCode'].isin(DocumentTypeStndCodeKeep)) & 
            (df['TotalBedrooms'] >=1)].copy()

In [18]:
st = gpd.GeoDataFrame(st, geometry = gpd.points_from_xy(st.PropertyAddressLongitude, st.PropertyAddressLatitude))
st_proj = st.copy()
st_proj.crs="epsg:4326"
st_proj=st_proj.to_crs("EPSG:2163")
st_proj.crs

<Projected CRS: EPSG:2163>
Name: US National Atlas Equal Area
Axis Info [cartesian]:
- X[east]: Easting (metre)
- Y[north]: Northing (metre)
Area of Use:
- name: USA
- bounds: (167.65, 15.56, -65.69, 74.71)
Coordinate Operation:
- name: US National Atlas Equal Area
- method: Lambert Azimuthal Equal Area (Spherical)
Datum: Not specified (based on Clarke 1866 Authalic Sphere)
- Ellipsoid: Clarke 1866 Authalic Sphere
- Prime Meridian: Greenwich

In [19]:
import rasterio
from rasterstats import zonal_stats, point_query
from rasterio.warp import calculate_default_transform, reproject, Resampling

In [20]:
## Reproject raster and load it

dst_crs ="EPSG:2163"

with rasterio.open('/data/yoder/Spread_risk/Spread_risk_raster/BP_2016raster.tif') as src:
    transform, width, height = calculate_default_transform(
        src.crs, dst_crs, src.width, src.height, *src.bounds)
    kwargs = src.meta.copy()
    kwargs.update({
        'crs': dst_crs,
        'transform': transform,
        'width': width,
        'height': height
    })
    with rasterio.open('/data/yoder/Spread_risk/Spread_risk_raster/BP_2016raster_2163.tif', 'w', **kwargs) as dst:
        for i in range(1, src.count + 1):
                   reproject(
                    source=rasterio.band(src, i),
                    destination=rasterio.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=dst_crs,
                    resampling=Resampling.nearest)
                
src = rasterio.open('/data/yoder/Spread_risk/Spread_risk_raster/BP_2016raster_2163.tif')

In [21]:
## Plot points
fig, ax = plt.subplots()
show(src,ax = ax)
st_proj.plot(ax=ax, color ='red')

  A_scaled /= ((a_max - a_min) / frac)


<AxesSubplot:>

In [22]:
pts = st_proj.copy()

In [23]:
st_proj.shape

(1375077, 63)

In [24]:
## Select only valid geometry (note .isna() does not appear to catch point (nan,nan))
st_pts = st_proj[st_proj['geometry'].is_valid]
st_pts = gpd.clip(st_pts, state_map)

In [25]:
st_pts.shape

(1374190, 63)

In [26]:
st_pts.plot()

<AxesSubplot:>

In [27]:
pts = st_pts.copy()

In [28]:
%%time
## Add burn probability that the location is within (landfire uses 270 meter pixels)
pts['BP'] = point_query(pts, '/data/yoder/Spread_risk/Spread_risk_raster/BP_2016raster_2163.tif')

CPU times: user 49min 54s, sys: 3min 25s, total: 53min 19s
Wall time: 1h 1min 23s


In [29]:
%%time
## Add burn probability out to 1600 meters
pts['point_geometry'] = pts['geometry'].copy()
pts['geometry'] = pts.buffer(1600)
buff = pd.DataFrame(zonal_stats(pts, 
                             '/data/yoder/Spread_risk/Spread_risk_raster/BP_2016raster_2163.tif',
                            stats=['max','mean']))
buff.columns = [f'BP1600_'+str(col) for col in buff.columns]

buff.reset_index(drop=True,inplace=True)
pts.reset_index(drop=True,inplace=True)
pts = pd.concat([pts,buff],axis=1)
pts['geometry'] = pts['point_geometry'].copy()

CPU times: user 1h 16min 22s, sys: 7min 10s, total: 1h 23min 33s
Wall time: 1h 37min 47s


In [30]:
BurnCols = ['BP','BP1600_max','BP1600_mean']

In [32]:
%%time
ST_points = df.copy()
## Add fire perimeter data
import geopandas as gpd
Fireperims = gpd.read_file('/data/yoder/DensityProject/mtbs_perimeter_data/mtbs_perims_DD.shp')
Fireperims = Fireperims.to_crs("EPSG:2163")

state_map = state_map[['STUSPS', 'geometry']]
Buffed_state = state_map.copy()
Buffed_state['geometry'] = Buffed_state.buffer(20000)
ST_fires = gpd.sjoin(Fireperims, Buffed_state, op='within')
ST_fires = ST_fires.drop(['index_right'], axis=1)
ST_fires['Buffed_geometry100km'] = ST_fires.buffer(20000)
ST_fires['MTBS_geometry'] = ST_fires['geometry'].copy()
ST_fires['geometry'] = ST_fires['Buffed_geometry100km'].copy()


CPU times: user 27.2 s, sys: 599 ms, total: 27.8 s
Wall time: 28.1 s


In [31]:
df = pts.copy()

In [33]:
basemap = state_map.plot(edgecolor='black',color='white')
ST_fires.plot(ax=basemap, color='red')
ST_points.plot(ax= basemap, color='blue')

<AxesSubplot:>

In [34]:
%%time
ST_Samp_with_Fires = gpd.sjoin(ST_points, ST_fires, how='left')

CPU times: user 2min 38s, sys: 16.6 s, total: 2min 54s
Wall time: 2min 54s


In [35]:
def get_distance(row):
    distance = row.geometry.distance(row.MTBS_geometry)
    return distance

In [36]:
HomesAroundFires = ST_Samp_with_Fires[ST_Samp_with_Fires.MTBS_geometry.notnull()].copy()

In [37]:
HomesNotAroundFires = ST_Samp_with_Fires[ST_Samp_with_Fires.MTBS_geometry.isnull()].copy()

In [38]:
%%time
HomesAroundFires['Distance'] = HomesAroundFires.apply(lambda row: get_distance(row),axis=1)

CPU times: user 3min 3s, sys: 5.04 s, total: 3min 8s
Wall time: 3min 8s


In [39]:
import pickle
pickle.dump(HomesAroundFires, open( "COHomesAroundFires.p", "wb" ) )

In [1]:
import pickle

In [8]:
%%time
HomesAroundFires = pd.read_pickle('COHomesAroundFires.p')

CPU times: user 17.4 s, sys: 1.9 s, total: 19.3 s
Wall time: 26.2 s


In [40]:
df.shape

(1374190, 67)

In [10]:
HAF = HomesAroundFires

In [42]:
HNAF = HomesNotAroundFires

In [11]:
HAF

Unnamed: 0,TransId,FIPS,State,County,DataClassStndCode,RecordingDate,DocumentTypeStndCode,DocumentDate,SalesPriceAmount,PropertyUseStndCode,LoanAmount,LoanTypeStndCode,AssessorParcelNumber,PropertyFullStreetAddress,PropertyCity,PropertyState,PropertyAddressLatitude,PropertyAddressLongitude,ImportParcelID,RowID,LotSizeSquareFeet,GarageStndCode,GarageAreaSqFt,BuildingClassStndCode,BuildingQualityStndCode,BuildingConditionStndCode,ArchitecturalStyleStndCode,YearBuilt,EffectiveYearBuilt,YearRemodeled,NoOfStories,TotalRooms,TotalBedrooms,TotalKitchens,FullBath,ThreeQuarterBath,HalfBath,QuarterBath,TotalActualBathCount,TotalBathPlumbingFixtures,RoofCoverStndCode,RoofStructureTypeStndCode,HeatingTypeorSystemStndCode,AirConditioningTypeorSystemStndCode,FoundationTypeStndCode,ElevatorStndCode,FireplaceFlag,FireplaceNumber,WaterStndCode,SewerStndCode,LandAssessedValue,ImprovementAssessedValue,TotalAssessedValue,AssessmentYear,LandMarketValue,ImprovementMarketValue,TotalMarketValue,MarketValueYear,LandAppraisalValue,ImprovementAppraisalValue,TotalAppraisalValue,AppraisalValueYear,geometry,BP,point_geometry,BP1600_max,BP1600_mean,index_right,Fire_ID,Fire_Name,Year,StartMonth,StartDay,Fire_Type,Acres,STUSPS,Buffed_geometry100km,MTBS_geometry,Distance
40,97923044,8001,CO,ADAMS,H,1993-09-10,WRDE,,64900.0,SR,65711.0,,1825-18-1-14-001,5290 QUITMAN ST,DENVER,CO,39.792800,-105.039560,21694020.0,E2A75316-820C-E811-80C3-3863BB430E3F,7841.0,,,,C,,RA,1917.0,2001.0,2001.0,1.0,4.0,2.0,,1.0,,0.0,,,304.0,CS,GBL,,CE,,,,,,,6480.0,12170.0,18650.0,2017.0,90000.0,168983.0,258983.0,2017.0,,,,,POINT (-430686.662 -565717.586),0.000098,POINT (-430686.662 -565717.586),0.00035,0.000034,4535.0,CO3976210526320110320,INDIANGULCHFIRE,2011.0,3.0,20.0,Wildfire,1640.0,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",18050.275697
64,97923703,8001,CO,ADAMS,H,1993-10-01,WRDE,,66900.0,SR,67248.0,,1719-31-4-12-041,7320 ORCHARD CT,WESTMINSTER,CO,39.829613,-105.037318,21635035.0,A225D515-820C-E811-80C3-3863BB430E3F,7405.0,,,,C,AV,RA,1949.0,1980.0,1980.0,1.0,5.0,3.0,,1.0,,0.0,,,6.0,CS,GBL,,CE,,,,,,,6120.0,8700.0,14820.0,2017.0,85000.0,120851.0,205851.0,2017.0,,,,,POINT (-430258.478 -561645.060),0.000000,POINT (-430258.478 -561645.060),0.00005,0.000003,4535.0,CO3976210526320110320,INDIANGULCHFIRE,2011.0,3.0,20.0,Wildfire,1640.0,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",19271.480956
66,97923833,8001,CO,ADAMS,H,1993-10-05,WRDE,,65000.0,SR,65559.0,,1825-17-2-11-010,5333 JULIAN ST,DENVER,CO,39.793609,-105.031472,21693738.0,F1034316-820C-E811-80C3-3863BB430E3F,12632.0,DT,,,C,GD,RA,1951.0,,,1.0,4.0,2.0,,2.0,,0.0,,,8.0,CS,GBL,,CE,,,,,,,6480.0,17510.0,23990.0,2017.0,90000.0,243239.0,333239.0,2017.0,,,,,POINT (-429991.308 -565669.833),0.000000,POINT (-429991.308 -565669.833),0.00035,0.000032,4535.0,CO3976210526320110320,INDIANGULCHFIRE,2011.0,3.0,20.0,Wildfire,1640.0,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",18747.093695
67,97923833,8001,CO,ADAMS,H,1993-10-05,WRDE,,65000.0,SR,65559.0,,1825-17-2-11-010,5333 JULIAN ST,DENVER,CO,39.793609,-105.031472,21693738.0,F1034316-820C-E811-80C3-3863BB430E3F,12632.0,GR,1.0,,C,GD,RA,1951.0,,,1.0,4.0,2.0,,2.0,,0.0,,,8.0,CS,GBL,,CE,,,,,,,6480.0,17510.0,23990.0,2017.0,90000.0,243239.0,333239.0,2017.0,,,,,POINT (-429991.308 -565669.833),0.000000,POINT (-429991.308 -565669.833),0.00035,0.000032,4535.0,CO3976210526320110320,INDIANGULCHFIRE,2011.0,3.0,20.0,Wildfire,1640.0,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",18747.093695
74,97924030,8001,CO,ADAMS,H,1993-10-12,WRDE,,82500.0,SR,81885.0,,1719-31-1-23-022,3770 W 77TH AVE,WESTMINSTER,CO,39.836142,-105.036279,21633759.0,10CC3B16-820C-E811-80C3-3863BB430E3F,9017.0,AT,,,C,AV,RA,1957.0,,,1.0,5.0,3.0,,1.0,,0.0,,,6.0,CS,GBL,FA,,,,,,,,6120.0,10820.0,16940.0,2017.0,85000.0,150303.0,235303.0,2017.0,,,,,POINT (-430127.853 -560926.096),0.000000,POINT (-430127.853 -560926.096),0.00015,0.000006,4535.0,CO3976210526320110320,INDIANGULCHFIRE,2011.0,3.0,20.0,Wildfire,1640.0,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",19620.299471
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1373958,440494113,8123,CO,WELD,H,2018-08-22,WRDE,2018-08-22,172800.0,SR,138240.0,,0553-27-1-26-011,402 CAVE AVE,PIERCE,CO,40.634245,-104.757724,23963049.0,7866B8B7-EC36-E811-80C3-3863BB430E3F,8712.0,GR,672.0,,,,,1999.0,,,1.0,5.0,3.0,,2.0,,0.0,,,9.0,CS,,FA,,,,,,,,1370.0,5470.0,6840.0,2017.0,19000.0,75999.0,94999.0,2017.0,,,,,POINT (-401490.775 -473763.091),0.000067,POINT (-401490.775 -473763.091),0.00125,0.000409,4621.0,CO4070010491720020320,UNNAMED,2002.0,3.0,20.0,Wildfire,1209.0,CO,"POLYGON ((-434830.211 -473353.962, -435121.035...","POLYGON ((-413448.943 -465855.862, -413484.941...",14156.330102
1373986,440559996,8123,CO,WELD,H,2018-08-27,WRDE,2018-08-24,600000.0,SR,450000.0,,1051-06-4-00-020,23481 COUNTY ROAD 63,KERSEY,CO,40.340527,-104.472089,24026245.0,9FFACDB7-EC36-E811-80C3-3863BB430E3F,190357.0,AT,,,,,,1998.0,,,1.0,6.0,3.0,,2.0,,0.0,,,11.0,CS,,CE,CE,,,,,,,5010.0,17460.0,22470.0,2017.0,69642.0,242466.0,312108.0,2017.0,,,,,POINT (-379111.368 -507712.531),0.000160,POINT (-379111.368 -507712.531),0.00090,0.000250,4590.0,CO4042510428020000915,UNNAMED,2000.0,9.0,15.0,Unknown,1337.0,CO,"POLYGON ((-366934.094 -520360.075, -368213.981...","POLYGON ((-362844.766 -500794.019, -362895.864...",17318.122366
1373987,440559996,8123,CO,WELD,H,2018-08-27,WRDE,2018-08-24,600000.0,SR,450000.0,,1051-06-4-00-020,23481 COUNTY ROAD 63,KERSEY,CO,40.340527,-104.472089,24026245.0,9FFACDB7-EC36-E811-80C3-3863BB430E3F,190357.0,GR,538.0,,,,,1998.0,,,1.0,6.0,3.0,,2.0,,0.0,,,11.0,CS,,CE,CE,,,,,,,5010.0,17460.0,22470.0,2017.0,69642.0,242466.0,312108.0,2017.0,,,,,POINT (-379111.368 -507712.531),0.000160,POINT (-379111.368 -507712.531),0.00090,0.000250,4590.0,CO4042510428020000915,UNNAMED,2000.0,9.0,15.0,Unknown,1337.0,CO,"POLYGON ((-366934.094 -520360.075, -368213.981...","POLYGON ((-362844.766 -500794.019, -362895.864...",17318.122366
1374046,441210701,8123,CO,WELD,H,2018-09-05,WRDE,2018-09-05,330000.0,SR,231000.0,,0807-01-2-16-001,504 VIVIAN ST,FORT COLLINS,CO,40.520045,-104.849149,23974215.0,DDD1B1B7-EC36-E811-80C3-3863BB430E3F,9583.0,AT,,,,,,2002.0,,,2.0,6.0,3.0,,2.0,,2.0,,,9.0,CS,GHP,CE,CE,,,,,,,2020.0,15690.0,17710.0,2017.0,28000.0,217915.0,245915.0,2017.0,,,,,POINT (-409910.175 -485988.366),0.000000,POINT (-409910.175 -485988.366),0.00045,0.000039,4621.0,CO4070010491720020320,UNNAMED,2002.0,3.0,20.0,Wildfire,1209.0,CO,"POLYGON ((-434830.211 -473353.962, -435121.035...","POLYGON ((-413448.943 -465855.862, -413484.941...",19318.200212


In [12]:
df = HAF

In [13]:
df['RecordingDate'] =pd.to_datetime(df['RecordingDate'])
df['Year'] = df.RecordingDate.dt.year

In [14]:
# HAF.groupby('BuildingQualityStndCode')['TransId'].count()

In [15]:
df.groupby('RoofCoverStndCode')['TransId'].count()

RoofCoverStndCode
AP     212455
AS       1221
BR        109
BU      26141
CN       6584
CS    1860090
FG         51
GR         44
ME      27003
OT       1045
RC       2199
SH       3710
SL        912
ST          4
TG        652
TL      48841
UR       2617
WS     122623
Name: TransId, dtype: int64

In [16]:
## Convert to 2020 dollars
CPI = pd.read_csv(scratchpath+'/InfAdjust.csv')
df = pd.merge(df,CPI,on='Year',how='left')
df['SalesPrice'] = df['SalesPriceAmount']/df['Deflator_base2020']
df['LoanAmt'] = df['LoanAmount']/df['Deflator_base2020']

In [17]:
## Select only valid transactions
UnwantedLoanCodes =  ['AC','CT','CM','CS','CC','CL','DP','FO','FE','HE','LC','EB','EX','MD','NA','NP','FA','RE','RM','SM','SE','TR']
WantedLoanCodes =  ['AS','BL','CE','FM','PM','RD','SL']
DocumentTypeStndCodeDrop= ['CRDE','JTDE','QCDE','RRDE','VLDE']
DocumentTypeStndCodeKeep= ['CPDE','IDDE','PRDE','WRDE']


st = df.loc[(df['SalesPriceAmount'] > 0) &
            (~df['LoanTypeStndCode'].isin(UnwantedLoanCodes)) &
#             (pd.isnull(df['IntraFamilyTransferFlag'])) &
            (df['PropertyUseStndCode']=='SR') &
            (df['DocumentTypeStndCode'].isin(DocumentTypeStndCodeKeep)) & 
            (df['TotalBedrooms'] >=1)].copy()

In [18]:
st = st.drop([
              'AssessorParcelNumber','PropertyAddressLatitude',
                 'PropertyAddressLongitude','ImportParcelID'],axis=1)

#May need to drop'TimeshareStndCode',

In [19]:
st =st.drop(['State'], axis=1)

In [20]:
rooftypes = pd.get_dummies(st['RoofCoverStndCode'],prefix='roof')

In [21]:
df = pd.concat([st, rooftypes], axis=1)

In [22]:
roofs = rooftypes.columns

In [23]:
samp = df.copy()

In [24]:
samp['Bathrooms'] = samp['FullBath'].replace(np.nan, 0) + samp['ThreeQuarterBath'].replace(np.nan, 0) + samp['HalfBath'].replace(np.nan, 0) +samp['QuarterBath'].replace(np.nan, 0)

In [25]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
from sklearn.linear_model import LinearRegression
from scipy.stats import ttest_ind


In [26]:
roofs

Index(['roof_AP', 'roof_AS', 'roof_BR', 'roof_BU', 'roof_CN', 'roof_CS',
       'roof_FG', 'roof_GR', 'roof_ME', 'roof_OT', 'roof_RC', 'roof_SH',
       'roof_SL', 'roof_ST', 'roof_TG', 'roof_TL', 'roof_UR', 'roof_WS'],
      dtype='object')

In [27]:
roofs.values

array(['roof_AP', 'roof_AS', 'roof_BR', 'roof_BU', 'roof_CN', 'roof_CS',
       'roof_FG', 'roof_GR', 'roof_ME', 'roof_OT', 'roof_RC', 'roof_SH',
       'roof_SL', 'roof_ST', 'roof_TG', 'roof_TL', 'roof_UR', 'roof_WS'],
      dtype=object)

In [28]:
samp.groupby('RoofCoverStndCode')['TransId'].count()

RoofCoverStndCode
AP     212455
AS       1221
BR        109
BU      26141
CN       6584
CS    1860090
FG         51
GR         44
ME      27003
OT       1045
RC       2199
SH       3710
SL        912
ST          4
TG        652
TL      48841
UR       2617
WS     122623
Name: TransId, dtype: int64

In [29]:
BP_geq0 = samp.copy()

In [30]:
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] =='AP', 'AsphaltRoof'] = 1
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] !='AP', 'AsphaltRoof'] = 0
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] =='AS', 'Asbestos'] = 1
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] !='AS', 'Asbestos'] = 0
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] =='BR', 'Bermuda'] = 1
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] !='BR', 'Bermuda'] = 0
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] =='BU', 'BuiltUpRoof'] = 1
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] !='BU', 'BuiltUpRoof'] = 0
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] =='CN', 'ConcreteRoof'] = 1
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] !='CN', 'ConcreteRoof'] = 0
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] =='CS', 'CompositShingle'] = 1
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] !='CS', 'CompositShingle'] = 0
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] =='ME', 'MetalRoof'] = 1
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] !='ME', 'MetalRoof'] = 0
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] =='TL', 'TileRoof'] = 1
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] !='TL', 'TileRoof'] = 0
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] =='WS', 'Wood'] = 1
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] =='WG', 'Wood'] = 1
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] =='WD', 'Wood'] = 1
BP_geq0.loc[BP_geq0['RoofCoverStndCode'] !='WS', 'Wood'] = 0

In [31]:
samp = BP_geq0.copy()

In [32]:
samp = samp[samp['SalesPrice'] > 10000].copy()
samp = samp[samp['SalesPrice'] < 5000000].copy()

In [33]:
samp = samp.drop_duplicates(subset=['TransId'])

In [34]:
samp['BP'].describe()

count    487429.000000
mean          0.000471
std           0.001295
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000130
max           0.011418
Name: BP, dtype: float64

In [35]:
## .001 1 in 1,000
## .0001 1 in 10,000

In [36]:
pd.options.display.float_format = '{:.2f}'.format

In [37]:
samp[samp['BP']>0].describe()

Unnamed: 0,TransId,FIPS,SalesPriceAmount,LoanAmount,LotSizeSquareFeet,GarageAreaSqFt,YearBuilt,EffectiveYearBuilt,YearRemodeled,NoOfStories,TotalRooms,TotalBedrooms,TotalKitchens,FullBath,ThreeQuarterBath,HalfBath,QuarterBath,TotalActualBathCount,TotalBathPlumbingFixtures,FireplaceNumber,LandAssessedValue,ImprovementAssessedValue,TotalAssessedValue,AssessmentYear,LandMarketValue,ImprovementMarketValue,TotalMarketValue,MarketValueYear,LandAppraisalValue,ImprovementAppraisalValue,TotalAppraisalValue,AppraisalValueYear,BP,BP1600_max,BP1600_mean,index_right,Year,StartMonth,StartDay,Acres,Distance,CIP,Deflator_base2020,SalesPrice,LoanAmt,roof_AP,roof_AS,roof_BR,roof_BU,roof_CN,roof_CS,roof_FG,roof_GR,roof_ME,roof_OT,roof_RC,roof_SH,roof_SL,roof_ST,roof_TG,roof_TL,roof_UR,roof_WS,Bathrooms,AsphaltRoof,Asbestos,Bermuda,BuiltUpRoof,ConcreteRoof,CompositShingle,MetalRoof,TileRoof,Wood
count,229815.0,229815.0,229815.0,229807.0,222733.0,956.0,229793.0,55909.0,53998.0,175967.0,229814.0,229815.0,1064.0,174150.0,1066.0,175216.0,1066.0,55664.0,48402.0,126070.0,210036.0,210200.0,229774.0,229815.0,227547.0,227714.0,229786.0,229786.0,2768.0,2768.0,2768.0,1066.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229807.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0,229815.0
mean,143814349.62,8047.0,281142.43,211779.88,118738.99,623.46,1980.25,1994.55,1994.69,1.43,4.62,3.36,1.06,2.39,0.0,0.22,0.0,2.58,6.04,1.36,7838.43,20862.9,29425.27,2017.13,110113.77,290939.28,399058.49,2017.08,61804.02,150614.14,212418.16,2017.0,0.0,0.0,0.0,4653.44,2004.21,5.24,17.39,7900.45,14001.67,191.55,0.74,374533.52,278096.6,0.06,0.0,0.0,0.0,0.01,0.52,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.07,1.98,0.06,0.0,0.0,0.0,0.01,0.52,0.02,0.02,0.07
std,103459721.92,23.57,206688.42,780128.61,2398160.63,243.93,20.63,19.45,19.54,0.52,3.73,0.93,0.25,1.01,0.0,0.54,0.0,0.78,25.81,0.62,9352.57,13342.87,22221.74,0.33,132773.44,188124.82,286745.4,0.27,122121.29,266368.38,354331.96,0.0,0.0,0.0,0.0,1516.38,6.99,2.37,7.84,16592.63,4911.4,30.79,0.12,255749.44,833780.23,0.23,0.01,0.01,0.07,0.08,0.5,0.01,0.01,0.14,0.03,0.04,0.04,0.03,0.0,0.03,0.15,0.02,0.25,1.49,0.23,0.01,0.01,0.07,0.08,0.5,0.14,0.15,0.25
min,97923044.0,8001.0,6500.0,0.0,436.0,160.0,1854.0,1885.0,1885.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.5,0.0,1.0,0.0,0.0,20.0,2016.0,0.0,53.0,122.0,2016.0,0.0,0.0,0.0,2017.0,0.0,0.0,0.0,4360.0,1993.0,1.0,1.0,1001.0,0.0,144.46,0.56,10890.36,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,103210173.5,8035.0,164800.0,127500.0,7398.0,484.0,1971.0,1985.0,1985.0,1.0,0.0,3.0,1.0,2.0,0.0,0.0,0.0,2.0,0.0,1.0,3240.0,13660.0,18260.0,2017.0,45000.0,191638.0,251334.0,2017.0,0.0,0.0,0.0,2017.0,0.0,0.0,0.0,4496.0,1999.0,3.0,12.0,1386.0,11396.78,166.57,0.64,235242.3,186926.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,104494528.0,8041.0,230000.0,184000.0,10700.0,576.0,1983.0,1999.0,1999.0,1.0,5.0,3.0,1.0,2.0,0.0,0.0,0.0,2.5,3.0,1.0,5620.0,18150.0,24520.0,2017.0,79000.0,252082.5,337911.5,2017.0,0.0,0.0,0.0,2017.0,0.0,0.0,0.0,4517.0,2003.0,5.0,20.0,2692.0,15262.08,183.96,0.71,311813.11,251643.31,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
75%,106077651.5,8059.0,335000.0,262014.0,36155.0,720.0,1996.0,2011.0,2011.0,2.0,7.0,4.0,1.0,3.0,0.0,0.0,0.0,3.25,9.0,2.0,8748.0,24538.0,33385.0,2017.0,122745.0,338893.25,456398.75,2017.0,39500.0,237425.0,378022.5,2017.0,0.0,0.0,0.0,4535.0,2009.0,6.0,24.0,9131.0,17879.23,215.3,0.83,435042.03,337594.76,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
max,449555540.0,8123.0,4750000.0,299571007.0,377142480.0,2036.0,2017.0,2017.0,2017.0,4.5,90.0,16.0,3.0,16.0,0.0,8.0,0.0,8.5,974.0,9.0,254720.0,1063027.0,1250947.0,2018.0,4444700.0,5546800.0,9213600.0,2018.0,800000.0,2423940.0,2934680.0,2017.0,0.01,0.01,0.01,21924.0,2018.0,11.0,30.0,129417.0,19999.38,251.11,0.97,4921923.93,323041694.69,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,24.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [38]:
samp = samp[samp['BP']>.0001]

In [39]:
# extras    +  BP*AsphaltRoo + BP*CompositShingle + BP*MetalRoof + BP*Wood 

In [40]:
samp['BP1600_max'].describe()

count   129307.00
mean         0.00
std          0.00
min          0.00
25%          0.00
50%          0.00
75%          0.00
max          0.01
Name: BP1600_max, dtype: float64

In [41]:
samp

Unnamed: 0,TransId,FIPS,County,DataClassStndCode,RecordingDate,DocumentTypeStndCode,DocumentDate,SalesPriceAmount,PropertyUseStndCode,LoanAmount,LoanTypeStndCode,PropertyFullStreetAddress,PropertyCity,PropertyState,RowID,LotSizeSquareFeet,GarageStndCode,GarageAreaSqFt,BuildingClassStndCode,BuildingQualityStndCode,BuildingConditionStndCode,ArchitecturalStyleStndCode,YearBuilt,EffectiveYearBuilt,YearRemodeled,NoOfStories,TotalRooms,TotalBedrooms,TotalKitchens,FullBath,ThreeQuarterBath,HalfBath,QuarterBath,TotalActualBathCount,TotalBathPlumbingFixtures,RoofCoverStndCode,RoofStructureTypeStndCode,HeatingTypeorSystemStndCode,AirConditioningTypeorSystemStndCode,FoundationTypeStndCode,ElevatorStndCode,FireplaceFlag,FireplaceNumber,WaterStndCode,SewerStndCode,LandAssessedValue,ImprovementAssessedValue,TotalAssessedValue,AssessmentYear,LandMarketValue,ImprovementMarketValue,TotalMarketValue,MarketValueYear,LandAppraisalValue,ImprovementAppraisalValue,TotalAppraisalValue,AppraisalValueYear,geometry,BP,point_geometry,BP1600_max,BP1600_mean,index_right,Fire_ID,Fire_Name,Year,StartMonth,StartDay,Fire_Type,Acres,STUSPS,Buffed_geometry100km,MTBS_geometry,Distance,CIP,Deflator_base2020,SalesPrice,LoanAmt,roof_AP,roof_AS,roof_BR,roof_BU,roof_CN,roof_CS,roof_FG,roof_GR,roof_ME,roof_OT,roof_RC,roof_SH,roof_SL,roof_ST,roof_TG,roof_TL,roof_UR,roof_WS,Bathrooms,AsphaltRoof,Asbestos,Bermuda,BuiltUpRoof,ConcreteRoof,CompositShingle,MetalRoof,TileRoof,Wood
25,97925774,8001,ADAMS,H,1993-11-30,WRDE,1993-11-26,69000.00,SR,62100.00,,5495 BRYANT ST,DENVER,CO,08FF4216-820C-E811-80C3-3863BB430E3F,10019.00,DT,,,C,AV,RA,1948.00,,,1.00,5.00,2.00,,1.00,,0.00,,,6.00,CS,GBL,FA,,,,,,,,4900.00,12770.00,17670.00,2017.00,68000.00,177341.00,245341.00,2017.00,,,,,POINT (-428924.217 -565424.354),0.00,POINT (-428924.217 -565424.354),0.00,0.00,4535.00,CO3976210526320110320,INDIANGULCHFIRE,1993,3.00,20.00,Wildfire,1640.00,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",19832.56,144.46,0.56,123620.22,111258.20,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00
40,97926641,8001,ADAMS,H,1993-12-20,WRDE,1993-12-15,110000.00,SR,104500.00,,5392 QUITMAN ST,DENVER,CO,33FB6116-820C-E811-80C3-3863BB430E3F,11326.00,AT,,,D+,AV,RA,1947.00,,,1.00,7.00,4.00,,2.00,,0.00,,,9.00,CS,GBL,FA,,,,,,,,9720.00,19680.00,29400.00,2017.00,135000.00,273362.00,408362.00,2017.00,,,,,POINT (-430680.606 -565585.132),0.00,POINT (-430680.606 -565585.132),0.00,0.00,4535.00,CO3976210526320110320,INDIANGULCHFIRE,1993,3.00,20.00,Wildfire,1640.00,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",18068.87,144.46,0.56,197075.71,187221.92,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00
257,97937039,8001,ADAMS,H,1994-09-09,WRDE,1994-09-02,100888.00,SR,18000.00,SL,5590 CLAY ST,DENVER,CO,9C661A16-820C-E811-80C3-3863BB430E3F,9801.00,,,,C,AV,RA,1962.00,,,1.00,6.00,4.00,,2.00,,0.00,,,9.00,CS,GBL,FA,CE,,,,1.00,,,4900.00,15540.00,20440.00,2017.00,68000.00,215827.00,283827.00,2017.00,,,,,POINT (-428962.251 -565223.094),0.00,POINT (-428962.251 -565223.094),0.00,0.00,4535.00,CO3976210526320110320,INDIANGULCHFIRE,1994,3.00,20.00,Wildfire,1640.00,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",19815.45,148.22,0.57,176157.47,31429.25,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00
265,97937135,8001,ADAMS,H,1994-09-13,WRDE,1994-09-06,132500.00,SR,106000.00,,5431 NEWTON ST,DENVER,CO,55921516-820C-E811-80C3-3863BB430E3F,24829.00,AT,,,C,AV,,1973.00,,,,7.00,4.00,,4.00,,0.00,,,8.00,CS,GBL,HW,,,,,1.00,,,14580.00,11130.00,25710.00,2017.00,202500.00,154595.00,357095.00,2017.00,,,,,POINT (-430426.527 -565451.971),0.00,POINT (-430426.527 -565451.971),0.00,0.00,4535.00,CO3976210526320110320,INDIANGULCHFIRE,1994,3.00,20.00,Wildfire,1640.00,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",18335.14,148.22,0.57,231354.22,185083.38,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,4.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00
329,97938515,8001,ADAMS,H,1994-10-21,WRDE,1994-10-18,118000.00,SR,40000.00,,2561 W 55TH AVE,DENVER,CO,FE01D515-820C-E811-80C3-3863BB430E3F,15682.00,AT,,,C,GD,,1994.00,,,2.00,6.00,4.00,,3.00,,0.00,,,13.00,CS,GBL,,CE,,,,,,,4900.00,19860.00,24760.00,2017.00,68000.00,275868.00,343868.00,2017.00,,,,,POINT (-428887.175 -565358.242),0.00,POINT (-428887.175 -565358.242),0.00,0.00,4535.00,CO3976210526320110320,INDIANGULCHFIRE,1994,3.00,20.00,Wildfire,1640.00,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",19875.99,148.22,0.57,206036.21,69842.78,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2825280,438045132,8123,WELD,H,2018-07-23,WRDE,2018-07-20,500000.00,SR,400000.00,,7895 COUNTY ROAD 96,WELLINGTON,CO,DA2ACBB7-EC36-E811-80C3-3863BB430E3F,1441836.00,BI,,,,,,2002.00,,,1.00,0.00,3.00,,2.00,,1.00,,,13.00,CS,GHP,HW,,,,,,,,950.00,18960.00,19910.00,2017.00,3279.00,232146.00,235425.00,2017.00,,,,,POINT (-413800.361 -467470.671),0.00,POINT (-413800.361 -467470.671),0.00,0.00,4621.00,CO4070010491720020320,UNNAMED,2018,3.00,20.00,Wildfire,1209.00,CO,"POLYGON ((-434830.211 -473353.962, -435121.035...","POLYGON ((-413448.943 -465855.862, -413484.941...",685.64,251.11,0.97,515340.75,412272.60,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00
2825302,439387681,8123,WELD,H,2018-08-09,WRDE,2018-08-08,84000.00,SR,67200.00,,49925 COUNTY ROAD 75,BRIGGSDALE,CO,88E7BAB7-EC36-E811-80C3-3863BB430E3F,3449516.00,BA,,,,,,2001.00,,,1.00,5.00,3.00,,2.00,,0.00,,,10.00,CS,GHP,CE,CE,,,,,,,400.00,6420.00,6820.00,2017.00,1395.00,89142.00,90537.00,2017.00,,,,,POINT (-367103.482 -465834.177),0.00,POINT (-367103.482 -465834.177),0.00,0.00,4636.00,CO4083210437120011107,SPRING,2018,11.00,7.00,Wildfire,1380.00,CO,"POLYGON ((-387767.079 -447803.508, -387600.895...","POLYGON ((-367273.843 -451362.375, -367262.564...",10177.12,251.11,0.97,86577.25,69261.80,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00
2825304,440148623,8123,WELD,H,2018-08-17,WRDE,2018-08-17,108300.00,SR,105051.00,,241 SHIRLEY AVE,NEW RAYMER,CO,31C0B1B7-EC36-E811-80C3-3863BB430E3F,27878.00,,,,,,,1919.00,1998.00,1998.00,1.00,4.00,2.00,,1.00,,0.00,,,6.00,CS,GHP,FL,,,,,,,,170.00,3650.00,3820.00,2017.00,2400.00,50699.00,53099.00,2017.00,,,,,POINT (-324569.567 -480647.330),0.00,POINT (-324569.567 -480647.330),0.00,0.00,4613.00,CO4063510401020020601,UNNAMED,2018,6.00,1.00,Wildfire,5258.00,CO,"POLYGON ((-360715.342 -483084.911, -360797.751...","POLYGON ((-338463.482 -476623.227, -338454.906...",10977.62,251.11,0.97,111622.81,108274.12,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00
2825308,440494101,8123,WELD,H,2018-08-22,WRDE,2018-08-17,140000.00,SR,112000.00,,67164 COUNTY ROAD 390,WELD,CO,8ECE30B6-EC36-E811-80C3-3863BB430E3F,97574.00,BA,,,,,,1925.00,1948.00,1948.00,1.00,5.00,4.00,,2.00,,0.00,,,9.00,CS,,CE,CE,,,,,,,320.00,6600.00,6920.00,2017.00,1120.00,91629.00,92749.00,2017.00,,,,,POINT (-368820.387 -437369.223),0.00,POINT (-368820.387 -437369.223),0.00,0.00,4636.00,CO4083210437120011107,SPRING,2018,11.00,7.00,Wildfire,1380.00,CO,"POLYGON ((-387767.079 -447803.508, -387600.895...","POLYGON ((-367273.843 -451362.375, -367262.564...",13645.48,251.11,0.97,144295.41,115436.33,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00


In [42]:
samp['BP'] = samp['BP']*10000

In [43]:
pd.options.display.float_format = '{:.2f}'.format

In [44]:
samp.describe()

Unnamed: 0,TransId,FIPS,SalesPriceAmount,LoanAmount,LotSizeSquareFeet,GarageAreaSqFt,YearBuilt,EffectiveYearBuilt,YearRemodeled,NoOfStories,TotalRooms,TotalBedrooms,TotalKitchens,FullBath,ThreeQuarterBath,HalfBath,QuarterBath,TotalActualBathCount,TotalBathPlumbingFixtures,FireplaceNumber,LandAssessedValue,ImprovementAssessedValue,TotalAssessedValue,AssessmentYear,LandMarketValue,ImprovementMarketValue,TotalMarketValue,MarketValueYear,LandAppraisalValue,ImprovementAppraisalValue,TotalAppraisalValue,AppraisalValueYear,BP,BP1600_max,BP1600_mean,index_right,Year,StartMonth,StartDay,Acres,Distance,CIP,Deflator_base2020,SalesPrice,LoanAmt,roof_AP,roof_AS,roof_BR,roof_BU,roof_CN,roof_CS,roof_FG,roof_GR,roof_ME,roof_OT,roof_RC,roof_SH,roof_SL,roof_ST,roof_TG,roof_TL,roof_UR,roof_WS,Bathrooms,AsphaltRoof,Asbestos,Bermuda,BuiltUpRoof,ConcreteRoof,CompositShingle,MetalRoof,TileRoof,Wood
count,129307.0,129307.0,129307.0,129303.0,125866.0,955.0,129290.0,31815.0,30346.0,100351.0,129306.0,129307.0,972.0,92088.0,974.0,93062.0,974.0,37218.0,20446.0,70410.0,121144.0,121197.0,129271.0,129307.0,127617.0,127673.0,129281.0,129281.0,2551.0,2551.0,2551.0,974.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129303.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0,129307.0
mean,146528238.38,8050.09,304578.12,228813.15,196648.17,623.73,1983.76,1994.01,1994.1,1.43,5.17,3.37,1.07,2.41,0.0,0.2,0.0,2.63,3.43,1.41,8090.45,22491.28,30570.9,2017.14,108918.95,307737.89,413880.83,2017.09,61678.28,151802.56,213480.83,2017.0,17.53,0.0,0.0,4733.6,2004.7,5.26,16.41,10420.69,13293.91,193.74,0.75,402947.36,297807.1,0.05,0.0,0.0,0.01,0.0,0.56,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.06,1.86,0.05,0.0,0.0,0.01,0.0,0.56,0.03,0.03,0.06
std,105963058.35,24.13,213276.73,1019978.12,3119872.64,243.9,20.39,19.21,19.42,0.5,3.72,0.96,0.25,0.99,0.0,0.53,0.0,0.8,13.19,0.66,9948.88,14888.2,22620.32,0.35,131651.93,190118.33,286278.6,0.29,122072.64,269480.6,357428.43,0.0,20.21,0.0,0.0,1931.68,7.02,2.06,7.66,21092.05,5422.08,30.92,0.12,265501.75,1088715.93,0.23,0.01,0.02,0.08,0.06,0.5,0.01,0.01,0.17,0.04,0.04,0.04,0.03,0.0,0.03,0.18,0.03,0.24,1.49,0.23,0.01,0.02,0.08,0.06,0.5,0.17,0.18,0.24
min,97925774.0,8001.0,6500.0,0.0,436.0,160.0,1859.0,1890.0,1890.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.5,0.0,1.0,0.0,0.0,20.0,2016.0,0.0,53.0,122.0,2016.0,0.0,0.0,0.0,2017.0,1.0,0.0,0.0,4360.0,1993.0,1.0,1.0,1001.0,0.0,144.46,0.56,10890.36,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,103266645.0,8041.0,176000.0,135757.5,8625.0,484.0,1974.0,1984.0,1984.0,1.0,0.0,3.0,1.0,2.0,0.0,0.0,0.0,2.25,0.0,1.0,3310.0,14230.0,18240.0,2017.0,45500.0,197526.0,251392.0,2017.0,0.0,0.0,0.0,2017.0,3.19,0.0,0.0,4496.0,1999.0,3.0,11.0,1640.0,10164.41,166.57,0.64,247421.28,194531.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,104471347.0,8041.0,253490.0,200000.0,21780.0,576.0,1987.0,1998.0,1998.0,1.0,6.0,3.0,1.0,2.0,0.0,0.0,0.0,2.5,0.0,1.0,5563.0,19600.0,25510.0,2017.0,75307.0,268901.0,350873.0,2017.0,0.0,0.0,0.0,2017.0,8.62,0.0,0.0,4514.0,2003.0,6.0,18.0,3834.0,14792.71,183.96,0.71,340483.57,269092.57,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
75%,106200431.0,8059.0,370000.0,285165.0,92347.0,720.0,1998.0,2010.0,2011.0,2.0,8.0,4.0,1.0,3.0,0.0,0.0,0.0,3.5,7.0,2.0,9070.0,26940.0,35890.0,2017.0,123631.0,367047.0,490800.0,2017.0,39500.0,240130.0,381775.0,2017.0,21.83,0.0,0.0,4535.0,2010.0,6.0,23.0,10082.0,17658.14,218.06,0.84,479324.32,364389.53,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
max,449544729.0,8123.0,4750000.0,299571007.0,377142480.0,2036.0,2017.0,2017.0,2017.0,4.5,90.0,16.0,3.0,16.0,0.0,7.0,0.0,8.5,776.0,9.0,254720.0,1063027.0,1250947.0,2018.0,3697980.0,5515620.0,9213600.0,2018.0,800000.0,2423940.0,2934680.0,2017.0,114.18,0.01,0.01,21924.0,2018.0,11.0,30.0,129417.0,19999.38,251.11,0.97,4896559.16,323041694.69,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,16.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [45]:
samp['BP'].describe()

count   129307.00
mean        17.53
std         20.21
min          1.00
25%          3.19
50%          8.62
75%         21.83
max        114.18
Name: BP, dtype: float64

In [46]:
samp[samp['BP']>40]['BP'].mean()

57.55419796961785

In [47]:
riskdummies = pd.cut(samp.BP, bins=[0, 10, 20, 30, 40, 10000], labels = ['verylow', 
       'low', 'moderate', 'high', 'veryhigh',])

In [48]:
samp

Unnamed: 0,TransId,FIPS,County,DataClassStndCode,RecordingDate,DocumentTypeStndCode,DocumentDate,SalesPriceAmount,PropertyUseStndCode,LoanAmount,LoanTypeStndCode,PropertyFullStreetAddress,PropertyCity,PropertyState,RowID,LotSizeSquareFeet,GarageStndCode,GarageAreaSqFt,BuildingClassStndCode,BuildingQualityStndCode,BuildingConditionStndCode,ArchitecturalStyleStndCode,YearBuilt,EffectiveYearBuilt,YearRemodeled,NoOfStories,TotalRooms,TotalBedrooms,TotalKitchens,FullBath,ThreeQuarterBath,HalfBath,QuarterBath,TotalActualBathCount,TotalBathPlumbingFixtures,RoofCoverStndCode,RoofStructureTypeStndCode,HeatingTypeorSystemStndCode,AirConditioningTypeorSystemStndCode,FoundationTypeStndCode,ElevatorStndCode,FireplaceFlag,FireplaceNumber,WaterStndCode,SewerStndCode,LandAssessedValue,ImprovementAssessedValue,TotalAssessedValue,AssessmentYear,LandMarketValue,ImprovementMarketValue,TotalMarketValue,MarketValueYear,LandAppraisalValue,ImprovementAppraisalValue,TotalAppraisalValue,AppraisalValueYear,geometry,BP,point_geometry,BP1600_max,BP1600_mean,index_right,Fire_ID,Fire_Name,Year,StartMonth,StartDay,Fire_Type,Acres,STUSPS,Buffed_geometry100km,MTBS_geometry,Distance,CIP,Deflator_base2020,SalesPrice,LoanAmt,roof_AP,roof_AS,roof_BR,roof_BU,roof_CN,roof_CS,roof_FG,roof_GR,roof_ME,roof_OT,roof_RC,roof_SH,roof_SL,roof_ST,roof_TG,roof_TL,roof_UR,roof_WS,Bathrooms,AsphaltRoof,Asbestos,Bermuda,BuiltUpRoof,ConcreteRoof,CompositShingle,MetalRoof,TileRoof,Wood
25,97925774,8001,ADAMS,H,1993-11-30,WRDE,1993-11-26,69000.00,SR,62100.00,,5495 BRYANT ST,DENVER,CO,08FF4216-820C-E811-80C3-3863BB430E3F,10019.00,DT,,,C,AV,RA,1948.00,,,1.00,5.00,2.00,,1.00,,0.00,,,6.00,CS,GBL,FA,,,,,,,,4900.00,12770.00,17670.00,2017.00,68000.00,177341.00,245341.00,2017.00,,,,,POINT (-428924.217 -565424.354),1.40,POINT (-428924.217 -565424.354),0.00,0.00,4535.00,CO3976210526320110320,INDIANGULCHFIRE,1993,3.00,20.00,Wildfire,1640.00,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",19832.56,144.46,0.56,123620.22,111258.20,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00
40,97926641,8001,ADAMS,H,1993-12-20,WRDE,1993-12-15,110000.00,SR,104500.00,,5392 QUITMAN ST,DENVER,CO,33FB6116-820C-E811-80C3-3863BB430E3F,11326.00,AT,,,D+,AV,RA,1947.00,,,1.00,7.00,4.00,,2.00,,0.00,,,9.00,CS,GBL,FA,,,,,,,,9720.00,19680.00,29400.00,2017.00,135000.00,273362.00,408362.00,2017.00,,,,,POINT (-430680.606 -565585.132),1.43,POINT (-430680.606 -565585.132),0.00,0.00,4535.00,CO3976210526320110320,INDIANGULCHFIRE,1993,3.00,20.00,Wildfire,1640.00,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",18068.87,144.46,0.56,197075.71,187221.92,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00
257,97937039,8001,ADAMS,H,1994-09-09,WRDE,1994-09-02,100888.00,SR,18000.00,SL,5590 CLAY ST,DENVER,CO,9C661A16-820C-E811-80C3-3863BB430E3F,9801.00,,,,C,AV,RA,1962.00,,,1.00,6.00,4.00,,2.00,,0.00,,,9.00,CS,GBL,FA,CE,,,,1.00,,,4900.00,15540.00,20440.00,2017.00,68000.00,215827.00,283827.00,2017.00,,,,,POINT (-428962.251 -565223.094),1.70,POINT (-428962.251 -565223.094),0.00,0.00,4535.00,CO3976210526320110320,INDIANGULCHFIRE,1994,3.00,20.00,Wildfire,1640.00,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",19815.45,148.22,0.57,176157.47,31429.25,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00
265,97937135,8001,ADAMS,H,1994-09-13,WRDE,1994-09-06,132500.00,SR,106000.00,,5431 NEWTON ST,DENVER,CO,55921516-820C-E811-80C3-3863BB430E3F,24829.00,AT,,,C,AV,,1973.00,,,,7.00,4.00,,4.00,,0.00,,,8.00,CS,GBL,HW,,,,,1.00,,,14580.00,11130.00,25710.00,2017.00,202500.00,154595.00,357095.00,2017.00,,,,,POINT (-430426.527 -565451.971),1.93,POINT (-430426.527 -565451.971),0.00,0.00,4535.00,CO3976210526320110320,INDIANGULCHFIRE,1994,3.00,20.00,Wildfire,1640.00,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",18335.14,148.22,0.57,231354.22,185083.38,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,4.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00
329,97938515,8001,ADAMS,H,1994-10-21,WRDE,1994-10-18,118000.00,SR,40000.00,,2561 W 55TH AVE,DENVER,CO,FE01D515-820C-E811-80C3-3863BB430E3F,15682.00,AT,,,C,GD,,1994.00,,,2.00,6.00,4.00,,3.00,,0.00,,,13.00,CS,GBL,,CE,,,,,,,4900.00,19860.00,24760.00,2017.00,68000.00,275868.00,343868.00,2017.00,,,,,POINT (-428887.175 -565358.242),1.45,POINT (-428887.175 -565358.242),0.00,0.00,4535.00,CO3976210526320110320,INDIANGULCHFIRE,1994,3.00,20.00,Wildfire,1640.00,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",19875.99,148.22,0.57,206036.21,69842.78,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2825280,438045132,8123,WELD,H,2018-07-23,WRDE,2018-07-20,500000.00,SR,400000.00,,7895 COUNTY ROAD 96,WELLINGTON,CO,DA2ACBB7-EC36-E811-80C3-3863BB430E3F,1441836.00,BI,,,,,,2002.00,,,1.00,0.00,3.00,,2.00,,1.00,,,13.00,CS,GHP,HW,,,,,,,,950.00,18960.00,19910.00,2017.00,3279.00,232146.00,235425.00,2017.00,,,,,POINT (-413800.361 -467470.671),10.81,POINT (-413800.361 -467470.671),0.00,0.00,4621.00,CO4070010491720020320,UNNAMED,2018,3.00,20.00,Wildfire,1209.00,CO,"POLYGON ((-434830.211 -473353.962, -435121.035...","POLYGON ((-413448.943 -465855.862, -413484.941...",685.64,251.11,0.97,515340.75,412272.60,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00
2825302,439387681,8123,WELD,H,2018-08-09,WRDE,2018-08-08,84000.00,SR,67200.00,,49925 COUNTY ROAD 75,BRIGGSDALE,CO,88E7BAB7-EC36-E811-80C3-3863BB430E3F,3449516.00,BA,,,,,,2001.00,,,1.00,5.00,3.00,,2.00,,0.00,,,10.00,CS,GHP,CE,CE,,,,,,,400.00,6420.00,6820.00,2017.00,1395.00,89142.00,90537.00,2017.00,,,,,POINT (-367103.482 -465834.177),10.49,POINT (-367103.482 -465834.177),0.00,0.00,4636.00,CO4083210437120011107,SPRING,2018,11.00,7.00,Wildfire,1380.00,CO,"POLYGON ((-387767.079 -447803.508, -387600.895...","POLYGON ((-367273.843 -451362.375, -367262.564...",10177.12,251.11,0.97,86577.25,69261.80,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00
2825304,440148623,8123,WELD,H,2018-08-17,WRDE,2018-08-17,108300.00,SR,105051.00,,241 SHIRLEY AVE,NEW RAYMER,CO,31C0B1B7-EC36-E811-80C3-3863BB430E3F,27878.00,,,,,,,1919.00,1998.00,1998.00,1.00,4.00,2.00,,1.00,,0.00,,,6.00,CS,GHP,FL,,,,,,,,170.00,3650.00,3820.00,2017.00,2400.00,50699.00,53099.00,2017.00,,,,,POINT (-324569.567 -480647.330),3.31,POINT (-324569.567 -480647.330),0.00,0.00,4613.00,CO4063510401020020601,UNNAMED,2018,6.00,1.00,Wildfire,5258.00,CO,"POLYGON ((-360715.342 -483084.911, -360797.751...","POLYGON ((-338463.482 -476623.227, -338454.906...",10977.62,251.11,0.97,111622.81,108274.12,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00
2825308,440494101,8123,WELD,H,2018-08-22,WRDE,2018-08-17,140000.00,SR,112000.00,,67164 COUNTY ROAD 390,WELD,CO,8ECE30B6-EC36-E811-80C3-3863BB430E3F,97574.00,BA,,,,,,1925.00,1948.00,1948.00,1.00,5.00,4.00,,2.00,,0.00,,,9.00,CS,,CE,CE,,,,,,,320.00,6600.00,6920.00,2017.00,1120.00,91629.00,92749.00,2017.00,,,,,POINT (-368820.387 -437369.223),8.40,POINT (-368820.387 -437369.223),0.00,0.00,4636.00,CO4083210437120011107,SPRING,2018,11.00,7.00,Wildfire,1380.00,CO,"POLYGON ((-387767.079 -447803.508, -387600.895...","POLYGON ((-367273.843 -451362.375, -367262.564...",13645.48,251.11,0.97,144295.41,115436.33,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00


In [49]:
riskdummies

25         verylow
40         verylow
257        verylow
265        verylow
329        verylow
            ...   
2825280        low
2825302        low
2825304    verylow
2825308    verylow
2825314    verylow
Name: BP, Length: 129307, dtype: category
Categories (5, object): ['verylow' < 'low' < 'moderate' < 'high' < 'veryhigh']

In [50]:
samp['risklevel'] = riskdummies

In [51]:
samp[['BuildingQualityStndCode']].value_counts()

BuildingQualityStndCode
C                          65884
B                          28748
C-                          8888
B+                          5529
B-                          3368
C+                          2658
A-                          1673
D+                          1618
D                           1391
A                            784
E                            330
A+                           131
D-                            30
dtype: int64

In [52]:
samp

Unnamed: 0,TransId,FIPS,County,DataClassStndCode,RecordingDate,DocumentTypeStndCode,DocumentDate,SalesPriceAmount,PropertyUseStndCode,LoanAmount,LoanTypeStndCode,PropertyFullStreetAddress,PropertyCity,PropertyState,RowID,LotSizeSquareFeet,GarageStndCode,GarageAreaSqFt,BuildingClassStndCode,BuildingQualityStndCode,BuildingConditionStndCode,ArchitecturalStyleStndCode,YearBuilt,EffectiveYearBuilt,YearRemodeled,NoOfStories,TotalRooms,TotalBedrooms,TotalKitchens,FullBath,ThreeQuarterBath,HalfBath,QuarterBath,TotalActualBathCount,TotalBathPlumbingFixtures,RoofCoverStndCode,RoofStructureTypeStndCode,HeatingTypeorSystemStndCode,AirConditioningTypeorSystemStndCode,FoundationTypeStndCode,ElevatorStndCode,FireplaceFlag,FireplaceNumber,WaterStndCode,SewerStndCode,LandAssessedValue,ImprovementAssessedValue,TotalAssessedValue,AssessmentYear,LandMarketValue,ImprovementMarketValue,TotalMarketValue,MarketValueYear,LandAppraisalValue,ImprovementAppraisalValue,TotalAppraisalValue,AppraisalValueYear,geometry,BP,point_geometry,BP1600_max,BP1600_mean,index_right,Fire_ID,Fire_Name,Year,StartMonth,StartDay,Fire_Type,Acres,STUSPS,Buffed_geometry100km,MTBS_geometry,Distance,CIP,Deflator_base2020,SalesPrice,LoanAmt,roof_AP,roof_AS,roof_BR,roof_BU,roof_CN,roof_CS,roof_FG,roof_GR,roof_ME,roof_OT,roof_RC,roof_SH,roof_SL,roof_ST,roof_TG,roof_TL,roof_UR,roof_WS,Bathrooms,AsphaltRoof,Asbestos,Bermuda,BuiltUpRoof,ConcreteRoof,CompositShingle,MetalRoof,TileRoof,Wood,risklevel
25,97925774,8001,ADAMS,H,1993-11-30,WRDE,1993-11-26,69000.00,SR,62100.00,,5495 BRYANT ST,DENVER,CO,08FF4216-820C-E811-80C3-3863BB430E3F,10019.00,DT,,,C,AV,RA,1948.00,,,1.00,5.00,2.00,,1.00,,0.00,,,6.00,CS,GBL,FA,,,,,,,,4900.00,12770.00,17670.00,2017.00,68000.00,177341.00,245341.00,2017.00,,,,,POINT (-428924.217 -565424.354),1.40,POINT (-428924.217 -565424.354),0.00,0.00,4535.00,CO3976210526320110320,INDIANGULCHFIRE,1993,3.00,20.00,Wildfire,1640.00,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",19832.56,144.46,0.56,123620.22,111258.20,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00,verylow
40,97926641,8001,ADAMS,H,1993-12-20,WRDE,1993-12-15,110000.00,SR,104500.00,,5392 QUITMAN ST,DENVER,CO,33FB6116-820C-E811-80C3-3863BB430E3F,11326.00,AT,,,D+,AV,RA,1947.00,,,1.00,7.00,4.00,,2.00,,0.00,,,9.00,CS,GBL,FA,,,,,,,,9720.00,19680.00,29400.00,2017.00,135000.00,273362.00,408362.00,2017.00,,,,,POINT (-430680.606 -565585.132),1.43,POINT (-430680.606 -565585.132),0.00,0.00,4535.00,CO3976210526320110320,INDIANGULCHFIRE,1993,3.00,20.00,Wildfire,1640.00,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",18068.87,144.46,0.56,197075.71,187221.92,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00,verylow
257,97937039,8001,ADAMS,H,1994-09-09,WRDE,1994-09-02,100888.00,SR,18000.00,SL,5590 CLAY ST,DENVER,CO,9C661A16-820C-E811-80C3-3863BB430E3F,9801.00,,,,C,AV,RA,1962.00,,,1.00,6.00,4.00,,2.00,,0.00,,,9.00,CS,GBL,FA,CE,,,,1.00,,,4900.00,15540.00,20440.00,2017.00,68000.00,215827.00,283827.00,2017.00,,,,,POINT (-428962.251 -565223.094),1.70,POINT (-428962.251 -565223.094),0.00,0.00,4535.00,CO3976210526320110320,INDIANGULCHFIRE,1994,3.00,20.00,Wildfire,1640.00,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",19815.45,148.22,0.57,176157.47,31429.25,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00,verylow
265,97937135,8001,ADAMS,H,1994-09-13,WRDE,1994-09-06,132500.00,SR,106000.00,,5431 NEWTON ST,DENVER,CO,55921516-820C-E811-80C3-3863BB430E3F,24829.00,AT,,,C,AV,,1973.00,,,,7.00,4.00,,4.00,,0.00,,,8.00,CS,GBL,HW,,,,,1.00,,,14580.00,11130.00,25710.00,2017.00,202500.00,154595.00,357095.00,2017.00,,,,,POINT (-430426.527 -565451.971),1.93,POINT (-430426.527 -565451.971),0.00,0.00,4535.00,CO3976210526320110320,INDIANGULCHFIRE,1994,3.00,20.00,Wildfire,1640.00,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",18335.14,148.22,0.57,231354.22,185083.38,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,4.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00,verylow
329,97938515,8001,ADAMS,H,1994-10-21,WRDE,1994-10-18,118000.00,SR,40000.00,,2561 W 55TH AVE,DENVER,CO,FE01D515-820C-E811-80C3-3863BB430E3F,15682.00,AT,,,C,GD,,1994.00,,,2.00,6.00,4.00,,3.00,,0.00,,,13.00,CS,GBL,,CE,,,,,,,4900.00,19860.00,24760.00,2017.00,68000.00,275868.00,343868.00,2017.00,,,,,POINT (-428887.175 -565358.242),1.45,POINT (-428887.175 -565358.242),0.00,0.00,4535.00,CO3976210526320110320,INDIANGULCHFIRE,1994,3.00,20.00,Wildfire,1640.00,CO,"POLYGON ((-471930.698 -570157.810, -472068.900...","POLYGON ((-451536.725 -569134.815, -451547.548...",19875.99,148.22,0.57,206036.21,69842.78,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00,verylow
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2825280,438045132,8123,WELD,H,2018-07-23,WRDE,2018-07-20,500000.00,SR,400000.00,,7895 COUNTY ROAD 96,WELLINGTON,CO,DA2ACBB7-EC36-E811-80C3-3863BB430E3F,1441836.00,BI,,,,,,2002.00,,,1.00,0.00,3.00,,2.00,,1.00,,,13.00,CS,GHP,HW,,,,,,,,950.00,18960.00,19910.00,2017.00,3279.00,232146.00,235425.00,2017.00,,,,,POINT (-413800.361 -467470.671),10.81,POINT (-413800.361 -467470.671),0.00,0.00,4621.00,CO4070010491720020320,UNNAMED,2018,3.00,20.00,Wildfire,1209.00,CO,"POLYGON ((-434830.211 -473353.962, -435121.035...","POLYGON ((-413448.943 -465855.862, -413484.941...",685.64,251.11,0.97,515340.75,412272.60,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00,low
2825302,439387681,8123,WELD,H,2018-08-09,WRDE,2018-08-08,84000.00,SR,67200.00,,49925 COUNTY ROAD 75,BRIGGSDALE,CO,88E7BAB7-EC36-E811-80C3-3863BB430E3F,3449516.00,BA,,,,,,2001.00,,,1.00,5.00,3.00,,2.00,,0.00,,,10.00,CS,GHP,CE,CE,,,,,,,400.00,6420.00,6820.00,2017.00,1395.00,89142.00,90537.00,2017.00,,,,,POINT (-367103.482 -465834.177),10.49,POINT (-367103.482 -465834.177),0.00,0.00,4636.00,CO4083210437120011107,SPRING,2018,11.00,7.00,Wildfire,1380.00,CO,"POLYGON ((-387767.079 -447803.508, -387600.895...","POLYGON ((-367273.843 -451362.375, -367262.564...",10177.12,251.11,0.97,86577.25,69261.80,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00,low
2825304,440148623,8123,WELD,H,2018-08-17,WRDE,2018-08-17,108300.00,SR,105051.00,,241 SHIRLEY AVE,NEW RAYMER,CO,31C0B1B7-EC36-E811-80C3-3863BB430E3F,27878.00,,,,,,,1919.00,1998.00,1998.00,1.00,4.00,2.00,,1.00,,0.00,,,6.00,CS,GHP,FL,,,,,,,,170.00,3650.00,3820.00,2017.00,2400.00,50699.00,53099.00,2017.00,,,,,POINT (-324569.567 -480647.330),3.31,POINT (-324569.567 -480647.330),0.00,0.00,4613.00,CO4063510401020020601,UNNAMED,2018,6.00,1.00,Wildfire,5258.00,CO,"POLYGON ((-360715.342 -483084.911, -360797.751...","POLYGON ((-338463.482 -476623.227, -338454.906...",10977.62,251.11,0.97,111622.81,108274.12,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00,verylow
2825308,440494101,8123,WELD,H,2018-08-22,WRDE,2018-08-17,140000.00,SR,112000.00,,67164 COUNTY ROAD 390,WELD,CO,8ECE30B6-EC36-E811-80C3-3863BB430E3F,97574.00,BA,,,,,,1925.00,1948.00,1948.00,1.00,5.00,4.00,,2.00,,0.00,,,9.00,CS,,CE,CE,,,,,,,320.00,6600.00,6920.00,2017.00,1120.00,91629.00,92749.00,2017.00,,,,,POINT (-368820.387 -437369.223),8.40,POINT (-368820.387 -437369.223),0.00,0.00,4636.00,CO4083210437120011107,SPRING,2018,11.00,7.00,Wildfire,1380.00,CO,"POLYGON ((-387767.079 -447803.508, -387600.895...","POLYGON ((-367273.843 -451362.375, -367262.564...",13645.48,251.11,0.97,144295.41,115436.33,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2.00,0.00,0.00,0.00,0.00,0.00,1.00,0.00,0.00,0.00,verylow


In [53]:
samp['LotSizeSquareFeet'] = samp['LotSizeSquareFeet']/1000
samp['BP2'] = samp['BP']**2
samp['WoodBP'] = samp['Wood']*samp['BP']
samp['WoodBP2'] = samp['Wood']*samp['BP2']

In [None]:
x = np.linspace(0,100,100)
y = -16.7*x**2 + 2538*x
a = np.linspace(0, 100, 100)
b = -7.9722*x**2 + 276.8*x 
fig, ax = plt.subplots()
ax.plot(x,y, label='BP')
ax.plot(a,b, label='WoodBP')
plt.legend()
plt.show()

In [54]:
## Summary Stats
samp[['SalesPrice','Year','BuildingQualityStndCode','TotalBedrooms', 'LotSizeSquareFeet',
     'Bathrooms','YearBuilt', 'BP','Wood']].describe()

Unnamed: 0,SalesPrice,Year,TotalBedrooms,LotSizeSquareFeet,Bathrooms,YearBuilt,BP,Wood
count,129307.0,129307.0,129307.0,125866.0,129307.0,129290.0,129307.0,129307.0
mean,402947.36,2004.7,3.37,196.65,1.86,1983.76,17.53,0.06
std,265501.75,7.02,0.96,3119.87,1.49,20.39,20.21,0.24
min,10890.36,1993.0,1.0,0.44,0.0,1859.0,1.0,0.0
25%,247421.28,1999.0,3.0,8.62,0.0,1974.0,3.19,0.0
50%,340483.57,2003.0,3.0,21.78,2.0,1987.0,8.62,0.0
75%,479324.32,2010.0,4.0,92.35,3.0,1998.0,21.83,0.0
max,4896559.16,2018.0,16.0,377142.48,16.0,2017.0,114.18,1.0


In [218]:
## Correlation matrix
samp[['SalesPrice','Year','BuildingQualityStndCode','TotalBedrooms', 'LotSizeSquareFeet',
     'Bathrooms','YearBuilt', 'BP', 'BP2','Wood', 'WoodBP', 'WoodBP2']].corr()

Unnamed: 0,SalesPrice,Year,TotalBedrooms,LotSizeSquareFeet,Bathrooms,YearBuilt,BP,BP2,Wood,WoodBP,WoodBP2
SalesPrice,1.0,0.09,0.25,0.01,0.35,0.08,0.13,0.12,0.08,0.05,0.04
Year,0.09,1.0,-0.02,0.02,-0.11,0.06,0.01,-0.01,-0.08,-0.04,-0.03
TotalBedrooms,0.25,-0.02,1.0,-0.01,0.15,0.22,-0.11,-0.1,0.07,0.03,0.01
LotSizeSquareFeet,0.01,0.02,-0.01,1.0,-0.01,-0.02,0.01,-0.0,-0.01,-0.0,-0.0
Bathrooms,0.35,-0.11,0.15,-0.01,1.0,0.04,0.11,0.14,0.11,0.08,0.07
YearBuilt,0.08,0.06,0.22,-0.02,0.04,1.0,-0.08,-0.1,-0.06,-0.05,-0.04
BP,0.13,0.01,-0.11,0.01,0.11,-0.08,1.0,0.97,-0.01,0.2,0.22
BP2,0.12,-0.01,-0.1,-0.0,0.14,-0.1,0.97,1.0,0.0,0.2,0.23
Wood,0.08,-0.08,0.07,-0.01,0.11,-0.06,-0.01,0.0,1.0,0.61,0.45
WoodBP,0.05,-0.04,0.03,-0.0,0.08,-0.05,0.2,0.2,0.61,1.0,0.96


In [225]:
Linear = smf.ols(formula = f'SalesPrice ~ C(Year) + BuildingQualityStndCode + TotalBedrooms  + LotSizeSquareFeet + Bathrooms +  C(YearBuilt) + Wood + BP + WoodBP', data=samp).fit()

In [255]:
samp.plot().scatter(x=samp['TotalBedrooms'], y=samp['SalesPrice'])

<matplotlib.collections.PathCollection at 0x2bc22bb63f40>

In [226]:
Linear = smf.ols(formula = f'SalesPrice ~  TotalBedrooms ', data=samp).fit()

In [227]:
Linear.summary()

0,1,2,3
Dep. Variable:,SalesPrice,R-squared:,0.064
Model:,OLS,Adj. R-squared:,0.064
Method:,Least Squares,F-statistic:,8912.0
Date:,"Thu, 16 Sep 2021",Prob (F-statistic):,0.0
Time:,18:41:07,Log-Likelihood:,-1794100.0
No. Observations:,129307,AIC:,3588000.0
Df Residuals:,129305,BIC:,3588000.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.653e+05,2616.735,63.169,0.000,1.6e+05,1.7e+05
TotalBedrooms,7.053e+04,747.097,94.404,0.000,6.91e+04,7.2e+04

0,1,2,3
Omnibus:,111738.863,Durbin-Watson:,1.556
Prob(Omnibus):,0.0,Jarque-Bera (JB):,5771299.023
Skew:,3.936,Prob(JB):,0.0
Kurtosis:,34.768,Cond. No.,13.8


In [221]:
Linear_quad = smf.ols(formula = f'SalesPrice ~ C(Year) + BuildingQualityStndCode+ TotalBedrooms  + LotSizeSquareFeet + Bathrooms +  C(YearBuilt) + Wood + BP + WoodBP + BP2 + WoodBP2', data=samp).fit()

In [222]:
Linear_quad.summary()

0,1,2,3
Dep. Variable:,SalesPrice,R-squared:,0.503
Model:,OLS,Adj. R-squared:,0.503
Method:,Least Squares,F-statistic:,618.0
Date:,"Thu, 16 Sep 2021",Prob (F-statistic):,0.0
Time:,11:48:06,Log-Likelihood:,-1598600.0
No. Observations:,117857,AIC:,3198000.0
Df Residuals:,117663,BIC:,3200000.0
Df Model:,193,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,6.585e+05,1.34e+05,4.929,0.000,3.97e+05,9.2e+05
C(Year)[T.1994],5334.3984,6805.087,0.784,0.433,-8003.465,1.87e+04
C(Year)[T.1995],1.388e+04,6835.255,2.031,0.042,487.416,2.73e+04
C(Year)[T.1996],1.017e+04,6763.323,1.504,0.133,-3084.275,2.34e+04
C(Year)[T.1997],2.393e+04,6599.324,3.626,0.000,1.1e+04,3.69e+04
C(Year)[T.1998],4.376e+04,6544.392,6.687,0.000,3.09e+04,5.66e+04
C(Year)[T.1999],6.785e+04,6543.988,10.368,0.000,5.5e+04,8.07e+04
C(Year)[T.2000],1.038e+05,6565.666,15.806,0.000,9.09e+04,1.17e+05
C(Year)[T.2001],1.295e+05,6538.728,19.812,0.000,1.17e+05,1.42e+05

0,1,2,3
Omnibus:,95273.915,Durbin-Watson:,1.858
Prob(Omnibus):,0.0,Jarque-Bera (JB):,8833110.702
Skew:,3.313,Prob(JB):,0.0
Kurtosis:,44.891,Cond. No.,9560000.0


In [170]:
7.9722/(2*276.8)

0.01440065028901734

In [171]:
16.7/(2538*2)

0.003289992119779354

In [172]:
Linear_quad.f_test( 'WoodBP=WoodBP2=0')

<class 'statsmodels.stats.contrast.ContrastResults'>
<F test: F=array([[3.07640279]]), p=0.0461285895475774, df_denom=1.18e+05, df_num=2>

In [189]:
Log = smf.ols(formula = f'np.log(SalesPrice) ~ C(Year) + BuildingQualityStndCode+ TotalBedrooms  + LotSizeSquareFeet + Bathrooms +  C(YearBuilt) + Wood*BP', data=samp).fit()

In [190]:
LinearLevels = smf.ols(formula = f'SalesPrice ~ C(Year) + BuildingQualityStndCode+ TotalBedrooms  + LotSizeSquareFeet + Bathrooms +  C(YearBuilt) + Wood*C(risklevel)', data=samp).fit()

In [191]:
LogLevels = smf.ols(formula = f'np.log(SalesPrice) ~ C(Year) + BuildingQualityStndCode+ TotalBedrooms  + LotSizeSquareFeet + Bathrooms + C(YearBuilt) + Wood*C(risklevel)', data=samp).fit()

In [192]:
LogSquared  = smf.ols(f'np.log(SalesPrice) ~ C(Year) + BuildingQualityStndCode+ TotalBedrooms  + LotSizeSquareFeet + Bathrooms +  C(YearBuilt) + Wood + BP + WoodBP + BP2 + WoodBP2', data=samp).fit()

In [193]:
LogSquared.f_test('WoodBP = WoodBP2 = 0')

<class 'statsmodels.stats.contrast.ContrastResults'>
<F test: F=array([[10.8814996]]), p=1.8821825993840405e-05, df_denom=1.18e+05, df_num=2>

In [194]:
LogSquared.f_test('WoodBP = WoodBP2 =0')

<class 'statsmodels.stats.contrast.ContrastResults'>
<F test: F=array([[10.8814996]]), p=1.8821825993840405e-05, df_denom=1.18e+05, df_num=2>

In [224]:
summary_col([Linear, Linear_quad, LinearLevels, Log, LogSquared, LogLevels], stars=True,
            regressor_order = ['Intercept','Year','BuildingQualityStndCode','TotalBedrooms', 'LotSizeSquareFeet',
     'Bathrooms','YearBuilt', 'BP', 'BP2','Wood', 'WoodBP', 'WoodBP2', 'C(risklevel)[T.verylow]', 
       'C(risklevel)[T.low]','Wood:C(risklevel)[T.low]',
                              'C(risklevel)[T.moderate]','Wood:C(risklevel)[T.moderate]',
                               'C(risklevel)[T.high]','Wood:C(risklevel)[T.high]',
                               'C(risklevel)[T.veryhigh]','Wood:C(risklevel)[T.veryhigh]'], drop_omitted=True , float_format='%.2f',)

0,1,2,3,4,5,6
,SalesPrice I,SalesPrice II,SalesPrice III,np.log(SalesPrice) I,np.log(SalesPrice) II,np.log(SalesPrice) III
Intercept,675659.41***,658469.66***,651914.14***,11.90***,11.86***,11.86***
,(133644.31),(133591.99),(133758.47),(0.27),(0.27),(0.28)
TotalBedrooms,29733.55***,29811.37***,29482.39***,0.08***,0.08***,0.08***
,(624.24),(623.99),(624.99),(0.00),(0.00),(0.00)
LotSizeSquareFeet,1.04***,0.99***,0.99***,0.00***,0.00***,0.00***
,(0.17),(0.17),(0.17),(0.00),(0.00),(0.00)
Bathrooms,37204.37***,37555.86***,37937.16***,0.07***,0.07***,0.07***
,(402.18),(403.34),(403.82),(0.00),(0.00),(0.00)
BP,1474.38***,2538.34***,,0.00***,0.01***,


In [None]:
samp.groupby('risklevel')['TransId'].count()

In [None]:
Mod1.summary2()

In [None]:
# Mod1.get_robustcov_results().summary()

In [None]:
Mod0 = smf.ols(formula = f'SalesPrice ~ C(Year) + BuildingQualityStndCode+ TotalBedrooms + Bathrooms +  YearBuilt + BP1600_max*AsphaltRoof  + BP1600_max*CompositShingle  + BP1600_max*MetalRoof + BP1600_max*Wood', data=samp).fit()

In [None]:
Mod0.get_robustcov_results().summary()

In [None]:
# from stargazer.stargazer import Stargazer

In [None]:
samp = data[['TransId','RecordingDate','SalesPriceAmount','FIPS','County','PropertyCity',
             'LotSizeSquareFeet','GarageStndCode','GarageAreaSqFt',
             'BuildingQualityStndCode','BuildingConditionStndCode','ArchitecturalStyleStndCode',
            'YearBuilt','TotalBedrooms','FullBath','ThreeQuarterBath','HalfBath','QuarterBath',
            'RoofCoverStndCode','RoofStructureTypeStndCode','HeatingTypeorSystemStndCode',
            'AirConditioningTypeorSystemStndCode','FireplaceNumber',
            'BP','BP1600_max','WoodRoof','SalesPrice','LoanAmt']].copy()