# Setup and Imports

In [17]:
import pandas as pd
import numpy as np
import london_data_functions as ldf
import warnings
import pickle
from datetime import datetime
from sklearn.preprocessing import OneHotEncoder

# Get the Data

In [18]:
%%time
query = """
            SELECT incident_number, timestamp_of_call, incident_group, property_category, 
                    address_qualifier, borough_name, first_pump_arriving_attendance_time as first_time,
                    num_stations_with_pumps_attending as station_pumps, num_pumps_attending as pumps_attending,
                    FROM `gdac-327115.LondonFire.fire_brigade`
        """

london = ldf.fetch_london_data(query_string=query, project_id = "gdac-327115", location = "eu")

Wall time: 6.68 s


# Clean the Data

In [7]:
warnings.filterwarnings('ignore')
london_clean = ldf.clean_london(london)

Cleaning London Data Started...

Sucessfully Added Emergency Column!
Sucessfully Added Month Column!
Sucessfully Added Hour Column!
Sucessfully Cleanded Property Category!
Sucessfully Cleaned and Ranked Property Type!
Sucessfully Cleaned Address!
Sucessfully Cleaned Borough Names!
Sucessfully Cleaned and Ranked Ward Names!
Sucessfully Imputed Station Names!
Sucessfully Ranked Stations!
Sucessfully Cleaned Station Arriving Time!
Sucessfully Cleaned Number of Stations with Pumps!
Sucessfully Cleaned Number of Pumps Attending!
Sucessfully Dropped Unecessary Columns!

Sucessfully Cleaned London Data!


# Load in Model

In [4]:
model = pickle.load(open("best_model.sav", 'rb'))
model

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.7192270091575351,
              enable_categorical=False, gamma=2.5108916227121214, gpu_id=-1,
              importance_type=None, interaction_constraints='',
              learning_rate=0.4434343526554207, max_delta_step=0, max_depth=7,
              min_child_weight=26, missing=nan, monotone_constraints='()',
              n_estimators=64, n_jobs=8, num_parallel_tree=1, predictor='auto',
              random_state=0, reg_alpha=0, reg_lambda=1.2882744251408234,
              scale_pos_weight=1, subsample=0.6112882087855153,
              tree_method='exact', use_label_encoder=False,
              validate_parameters=1, verbosity=None)

# Create Helper Functions

In [20]:
def str_to_date(date_str, time_str):
    """
    This function takes takes a date and time string and concatenates them together. Finally, it returns a datetime object.
    
    date_str: string containing the date in form mm/dd/yyyy
    time_str: string containing the time in form hh:mm:ss
    
    returns: datetime object 
    """
    crime_time = date_str + " " + time_str
    return datetime.strptime(crime_time, "%m/%d/%Y %H:%M:%S")

In [21]:
def user_input():
    """
    This function asks the user for the new crime details. It then converts the input to the form found in the original
    dataframe. 
    
    returns: DataFrame with the crime details
    """
    property_category = str.upper(input("Enter the property category: "))
    
    property_type = str.upper(input("Enter the property type: "))
    
    address = str.upper(input("Enter the address qualifier: "))
    
    borough = str.upper(input("Enter the borough name: "))
    
    ward = str.upper(input("Enter the ward name: "))
    
    first_time = str.upper(input("Enter the pump arriving time: "))
    
    first_station = str.upper(input("Enter the station where the first pump was deployed from: "))
    
    station_pumps = str.upper(input("Enter the number of stations with pumps attending: "))
    
    pumps_attending = str.upper(input("Enter the number of pumps attending: "))
    
    date = input("Date of Crime (mm/dd/yyy): ")
    
    hour = input("Time of Crime (hh:mm:ss): ")
    
    date_time = str_to_date(date, hour)
    
    new_crime_df = pd.DataFrame({"date": date_time, "property_category": property_category, "property_type": property_type, 
                                 "address_qualifier": address, "borough_name": borough, "ward_name": ward,  
                                 "first_time": first_time, "first_station": first_station, "station_pumps": station_pumps,
                                 "pumps_attending": pumps_attending}, index = [1])
    
    return new_crime_df

In [25]:
test = user_input()

Enter the property category: Dwelling
Enter the property type: house - single occupancy
Enter the address qualifier: within same building
Enter the borough name: kensington and chelsea stanley
Enter the ward name: 246
Enter the pump arriving time: 246
Enter the station where the first pump was deployed from: Chelsea
Enter the number of stations with pumps attending: 2
Enter the number of pumps attending: 2
Date of Crime (mm/dd/yyy): 03/03/2022
Time of Crime (hh:mm:ss): 14:30:15


In [26]:
test.head()

Unnamed: 0,date,property_category,property_type,address_qualifier,borough_name,ward_name,first_time,first_station,station_pumps,pumps_attending
1,2022-03-03 14:30:15,DWELLING,HOUSE - SINGLE OCCUPANCY,WITHIN SAME BUILDING,KENSINGTON AND CHELSEA STANLEY,246,246,CHELSEA,2,2


In [27]:
test_clean = ldf.clean_london(test)

Cleaning London Data Started...



KeyError: 'incident_group'

In [24]:
london.loc[london["first_station"].notnull()].tail()

Unnamed: 0,incident_number,timestamp_of_call,incident_group,property_category,property_type,address_qualifier,borough_name,ward_name,first_time,first_station,station_pumps,pumps_attending
32234,028524-08032017,2017-03-08 23:15:56+00:00,False Alarm,Dwelling,Converted Flat/Maisonette - Up to 2 storeys,Correct incident location,KENSINGTON AND CHELSEA,STANLEY,246,Chelsea,2,2
32235,030643-13032017,2017-03-13 20:12:13+00:00,False Alarm,Dwelling,Purpose Built Flats/Maisonettes - 4 to 9 storeys,Within same building,KENSINGTON AND CHELSEA,STANLEY,202,Chelsea,2,2
32236,035655-25032017,2017-03-25 11:28:27+00:00,False Alarm,Dwelling,Converted Flat/Maisonette - Up to 2 storeys,Correct incident location,KENSINGTON AND CHELSEA,STANLEY,229,Chelsea,1,1
32237,046792-15042017,2017-04-15 22:41:02+00:00,False Alarm,Dwelling,House - single occupancy,Correct incident location,KENSINGTON AND CHELSEA,STANLEY,106,Chelsea,2,2
32238,051430-25042017,2017-04-25 09:36:51+00:00,False Alarm,Dwelling,Converted Flat/Maisonettes - 3 or more storeys,Correct incident location,KENSINGTON AND CHELSEA,STANLEY,284,Chelsea,1,1


In [8]:
london_clean.head()

Unnamed: 0,property_category,address_qualifier,borough_name,first_time,station_pumps,pumps_attending,Emergency,Month,Hour,pt_rank,wn_rank,stat_rank
0,Residential,Correct incident location,BARKING AND DAGENHAM,305.0,1.0,2.0,0,2,18,5,4,5
1,Residential,Correct incident location,BARKING AND DAGENHAM,261.0,1.0,2.0,1,4,23,5,4,5
2,Residential,Within same building,BARKING AND DAGENHAM,218.0,1.0,2.0,1,1,14,5,4,5
3,Residential,Correct incident location,BARKING AND DAGENHAM,337.0,1.0,2.0,1,2,22,5,4,5
4,Residential,Correct incident location,BARKING AND DAGENHAM,325.154574,1.0,1.0,1,4,15,5,4,5
