In [1]:
# ignore warnings
import warnings
warnings.filterwarnings("ignore")
# Wrangling
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
# Statistical Tests
import scipy.stats as stats
from scipy.stats import norm
# Visualizing
import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.dates as dates
import seaborn as sns
from sklearn.model_selection import learning_curve
import datetime
pd.options.display.float_format = '{:20,.2f}'.format

# Acquire

In [2]:
df = pd.read_csv('allservicecalls.csv')

In [3]:
df.head()

Unnamed: 0,Category,CASEID,OPENEDDATETIME,SLA_Date,CLOSEDDATETIME,Late (Yes/No),Dept,REASONNAME,TYPENAME,CaseStatus,SourceID,OBJECTDESC,Council District,XCOORD,YCOORD,Report Starting Date,Report Ending Date
0,Graffiti,1010444245,2012-08-15T00:00:00,2012-08-30T00:00:00,,YES,Code Enforcement Services,Graffiti,Graffiti Public Property,Open,Web Portal,"600 NOGALITOS ST, San Antonio, 78204",5,2125683.0,13695548.0,2020-05-15T00:00:00,2021-05-15T00:00:00
1,Property Maintenance,1010888252,2013-06-06T00:00:00,2013-08-09T00:00:00,,YES,Code Enforcement Services,Code Enforcement (IntExp),Alley-Way Maintenance,Open,Web Portal,"6043 CASTLE QUEEN, San Antonio, 78218",2,2169702.0,13725769.0,2020-05-15T00:00:00,2021-05-15T00:00:00
2,Property Maintenance,1010966128,2013-07-19T00:00:00,2013-09-23T00:00:00,,YES,Code Enforcement Services,Code Enforcement (IntExp),Junk Vehicle On Private Property,Open,Web Portal,"842 KIRK PL, San Antonio, 78226",5,2116192.0,13692260.0,2020-05-15T00:00:00,2021-05-15T00:00:00
3,Property Maintenance,1011052825,2013-09-16T00:00:00,2013-09-30T00:00:00,,YES,Code Enforcement Services,Code Enforcement,Right Of Way/Sidewalk Obstruction,Open,Internal Services Requests,"54 KENROCK RIDGE, San Antonio, 78254",7,2082242.0,13737817.0,2020-05-15T00:00:00,2021-05-15T00:00:00
4,Property Maintenance,1011052826,2013-09-16T00:00:00,2013-09-30T00:00:00,,YES,Code Enforcement Services,Code Enforcement,Right Of Way/Sidewalk Obstruction,Open,Internal Services Requests,"74 KENROCK RIDGE, San Antonio, 78254",7,2082389.0,13737877.0,2020-05-15T00:00:00,2021-05-15T00:00:00


In [4]:
df.shape

(495440, 17)

# Prepare

In [5]:
import wrangle

from wrangle import clean_311, split_seperate_scale

In [6]:
df = clean_311(df)

In [7]:
df.head()

Unnamed: 0,case_id,open_date,due_date,closed_date,is_late,dept,call_reason,case_type,case_status,source_id,address,council_district,longitude,latitude,days_open,resolution_days_due,days_before_or_after_due,level_of_delay,zipcode
0,1010444245,2012-08-15,2012-08-30,NaT,YES,Code Enforcement Services,cleanup,Graffiti Public Property,Open,Web Portal,"600 NOGALITOS ST, San Antonio, 78204",5,2125683.0,13695548.0,,15,,Still Open,78204
1,1010888252,2013-06-06,2013-08-09,NaT,YES,Code Enforcement Services,code,Alley-Way Maintenance,Open,Web Portal,"6043 CASTLE QUEEN, San Antonio, 78218",2,2169702.0,13725769.0,,64,,Still Open,78218
2,1010966128,2013-07-19,2013-09-23,NaT,YES,Code Enforcement Services,code,Junk Vehicle On Private Property,Open,Web Portal,"842 KIRK PL, San Antonio, 78226",5,2116192.0,13692260.0,,66,,Still Open,78226
3,1011052825,2013-09-16,2013-09-30,NaT,YES,Code Enforcement Services,code,Right Of Way/Sidewalk Obstruction,Open,Internal Services Requests,"54 KENROCK RIDGE, San Antonio, 78254",7,2082242.0,13737817.0,,14,,Still Open,78254
4,1011052826,2013-09-16,2013-09-30,NaT,YES,Code Enforcement Services,code,Right Of Way/Sidewalk Obstruction,Open,Internal Services Requests,"74 KENROCK RIDGE, San Antonio, 78254",7,2082389.0,13737877.0,,14,,Still Open,78254


In [8]:
df.shape

(481205, 19)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 481205 entries, 0 to 495439
Data columns (total 19 columns):
 #   Column                    Non-Null Count   Dtype         
---  ------                    --------------   -----         
 0   case_id                   481205 non-null  int64         
 1   open_date                 481205 non-null  datetime64[ns]
 2   due_date                  481205 non-null  datetime64[ns]
 3   closed_date               431696 non-null  datetime64[ns]
 4   is_late                   481205 non-null  object        
 5   dept                      481205 non-null  object        
 6   call_reason               481205 non-null  object        
 7   case_type                 481205 non-null  object        
 8   case_status               481205 non-null  object        
 9   source_id                 481205 non-null  object        
 10  address                   481205 non-null  object        
 11  council_district          481205 non-null  int64         
 12  lo

In [10]:
train, validate, test, X_train, y_train, X_validate, y_validate, X_test, y_test, train_scaled, validate_scaled, test_scaled = split_seperate_scale(df)

In [11]:
train.head()

Unnamed: 0,case_id,open_date,due_date,closed_date,is_late,dept,call_reason,case_type,case_status,source_id,address,council_district,longitude,latitude,days_open,resolution_days_due,days_before_or_after_due,level_of_delay,zipcode
383464,1016709124,2021-02-26,2021-03-08,2021-02-26,NO,Animal Care Services,field,Animal Bite(Critical),Closed,Web Portal,"4142 TREEGARDEN DR, SAN ANTONIO, 78222",3,2154121.0,13685007.0,0.0,10,10.0,Early Response,78222
425327,1016775164,2021-03-30,2021-04-06,2021-04-05,NO,Solid Waste Management,waste,Carts(Damaged Cart),Closed,Web Portal,"502 CLAUDE W BLACK, SAN ANTONIO, 78203",2,2135492.0,13699083.0,6.0,7,1.0,Early Response,78203
67550,1016227560,2020-06-03,2020-06-10,2020-06-09,NO,Solid Waste Management,waste,Carts(Cart Exchange),Closed,Web Portal,"2411 TOWNCLIFF, SAN ANTONIO, 78238",6,2093526.0,13715141.0,6.0,7,1.0,Early Response,78238
354396,1016662232,2021-01-29,2021-02-03,2021-02-01,NO,Solid Waste Management,waste,No Pickup,Closed,Web Portal,"112 MEBANE, SAN ANTONIO, 78223",3,2135549.0,13685698.0,3.0,5,2.0,Early Response,78223
249630,1016465110,2020-10-22,2021-01-07,2021-04-12,YES,Development Services,code,Illegal Parking (Front-Side Yard Parking),Closed,Web Portal,"166 WAUGH, SAN ANTONIO, 78223",3,2142568.0,13684960.0,172.0,77,-95.0,On Time Response,78223


In [12]:
train.shape

(269474, 19)

In [13]:
validate.shape

(115490, 19)

In [14]:
test.shape

(96241, 19)

In [16]:
X_train.head()

Unnamed: 0,case_id,open_date,due_date,closed_date,is_late,dept,call_reason,case_type,case_status,source_id,address,council_district,longitude,latitude,days_open,resolution_days_due,days_before_or_after_due,zipcode
383464,1016709124,2021-02-26,2021-03-08,2021-02-26,NO,Animal Care Services,field,Animal Bite(Critical),Closed,Web Portal,"4142 TREEGARDEN DR, SAN ANTONIO, 78222",3,2154121.0,13685007.0,0.0,10,10.0,78222
425327,1016775164,2021-03-30,2021-04-06,2021-04-05,NO,Solid Waste Management,waste,Carts(Damaged Cart),Closed,Web Portal,"502 CLAUDE W BLACK, SAN ANTONIO, 78203",2,2135492.0,13699083.0,6.0,7,1.0,78203
67550,1016227560,2020-06-03,2020-06-10,2020-06-09,NO,Solid Waste Management,waste,Carts(Cart Exchange),Closed,Web Portal,"2411 TOWNCLIFF, SAN ANTONIO, 78238",6,2093526.0,13715141.0,6.0,7,1.0,78238
354396,1016662232,2021-01-29,2021-02-03,2021-02-01,NO,Solid Waste Management,waste,No Pickup,Closed,Web Portal,"112 MEBANE, SAN ANTONIO, 78223",3,2135549.0,13685698.0,3.0,5,2.0,78223
249630,1016465110,2020-10-22,2021-01-07,2021-04-12,YES,Development Services,code,Illegal Parking (Front-Side Yard Parking),Closed,Web Portal,"166 WAUGH, SAN ANTONIO, 78223",3,2142568.0,13684960.0,172.0,77,-95.0,78223


In [15]:
X_train.shape

(269474, 18)

In [17]:
y_train.head()

383464      Early Response
425327      Early Response
67550       Early Response
354396      Early Response
249630    On Time Response
Name: level_of_delay, dtype: category
Categories (8, object): ['Extremely Late Response' < 'Very Late Response' < 'Late Response' < 'On Time Response' < 'Early Response' < 'Very Early Response' < 'Extremely Early Response' < 'Still Open']

In [18]:
y_train.shape

(269474,)

In [19]:
train_scaled.head()

Unnamed: 0,case_id,council_district,longitude,latitude,days_open,resolution_days_due,days_before_or_after_due
0,0.97,0.3,0.63,0.38,0.0,0.01,0.79
1,0.99,0.2,0.54,0.45,0.0,0.0,0.79
2,0.9,0.6,0.32,0.52,0.0,0.0,0.79
3,0.97,0.3,0.54,0.38,0.0,0.0,0.79
4,0.94,0.3,0.57,0.38,0.07,0.05,0.76


In [20]:
train_scaled.shape

(269474, 7)