In [2]:
import pickle
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from google.cloud import bigquery


model = pickle.load(open("decision_tree_0_74.sav", 'rb'))
le = LabelEncoder()
project_name = 'weatherlink-404323'
client = bigquery.Client(project=project_name)

sql_query = """
select
*
FROM
`weatherlink_master.census_accident_master_filtered`
"""

df = client.query(sql_query).to_dataframe()

df["day_of_week_name_le"] = le.fit_transform(df["day_of_week_name"])
df["atmospheric_cond_name_le"] = le.fit_transform(df["atmospheric_cond_name"])

# Selecting features for prediction
df_temp = df[['week_of_crash', 'day_of_week_name_le', 'atmospheric_cond_name_le', 'no_car_ratio', 'male_pop_ratio', 'median_age', 'median_rent', 'income_per_capita']]

# Make prediction
predictions = model.predict(df_temp)
df['predictions'] = predictions

In [20]:
x = df[['geoid', 'timestamp_of_crash', 'year', 'predictions']].copy()

In [21]:
import geopandas as gpd
import pandas as pd

# Load geospatial data
geo_data = gpd.read_file('tl_2023_us_county.shp')

geo_data = geo_data.rename(columns={"GEOID": "geoid"})


In [22]:
columns_to_drop = [
    'STATEFP', 'COUNTYFP', 'COUNTYNS', 'GEOIDFQ', 'NAME',
    'NAMELSAD', 'LSAD', 'CLASSFP', 'MTFCC', 'CSAFP', 'CBSAFP', 'METDIVFP',
    'FUNCSTAT', 'ALAND', 'AWATER', 'geometry']

geo_data.drop(columns=columns_to_drop, inplace=True)


In [23]:
geo_data['geoid'] = geo_data['geoid'].astype(str)
x['geoid'] = x['geoid'].astype(str)
geo_data['geoid'] = geo_data['geoid'].str.strip()
x['geoid'] = x['geoid'].str.strip()
geo_data['geoid'] = geo_data['geoid'].apply(lambda x: f"{int(x):05d}")  # Convert to int and back to string
x['geoid'] = x['geoid'].apply(lambda x: f"{int(x):05d}")



In [24]:
merged_data = pd.merge(geo_data, x, on='geoid', how='inner')

In [25]:
merged_data = merged_data.rename(columns={"INTPTLAT": "LAT"})
merged_data = merged_data.rename(columns={"INTPTLON": "LON"})

In [26]:
def get_color(prediction):
    if prediction == 1:
        return [0, 255, 0, 140]  # Green 1 crash
    elif prediction == 2:
        return [255, 255, 0, 140]  # Yellow 2 crashes
    elif prediction == 3:
        return [255, 165, 0, 140]  # Orange 3 crashes
    elif prediction in [4, 5]:
        return [255, 0, 0, 140]  # Red 4-5 crashes
    else:
        return [128, 0, 128, 140]  # Purple 6+ crashes

In [35]:
data = merged_data[merged_data['timestamp_of_crash'] == '2019-06-10'].copy()

In [36]:
import pandas as pd
import pydeck as pdk

pdk.settings.mapbox_key = "pk.eyJ1IjoidHZzYXIiLCJhIjoiY2xwYWNndG5wMDU1ejJqbjFodTQyMW0xMyJ9.m3_fetaSvWRY7G9kxZUtdQ"

def convert_coord(coord):
    if isinstance(coord, str):
        return float(coord.replace('+', ''))
    elif isinstance(coord, float):
        return coord
    else:
        return None  

data['LAT'] = data['LAT'].apply(convert_coord)
data['LON'] = data['LON'].apply(convert_coord)


data['color'] = data['predictions'].apply(get_color)

layer = pdk.Layer(
    "ScatterplotLayer",
    data,
    get_position=["LON", "LAT"],
    get_color="color",
    get_radius=100000, 
    pickable=True,
    opacity=0.8,
)

# Centered view state on NYC
view_state = pdk.ViewState(latitude=42.1657, longitude=-74.9481, zoom=6, pitch=0)

deck = pdk.Deck(layers=[layer], initial_view_state=view_state)

deck.show()



DeckGLWidget(carto_key=None, custom_libraries=[], google_maps_key=None, json_input='{\n  "initialViewState": {…

In [132]:
deck.to_html('predictionmap.html')