![USD](usd.png)

### University of San Diego 

### Master of Science, Applied Data Science 

#### Contributors

- Ebad Akhter
- Kevin Baum
- Salvador Sanchez

***

In [1]:
#Libraries
import pandas as pd
from datetime import datetime
import numpy as np

In [2]:
#function to download yearly data
def Get_Data(year):
    url = f"https://seshat.datasd.org/pd/pd_calls_for_service_{year}_datasd.csv"
    df = pd.read_csv(url)
    return df

In [3]:
#Current year
current_year = datetime.now().year

In [4]:
#Current Data
current_data = Get_Data(current_year)

#Last Year
last_year_data = Get_Data(current_year-1)

current_data.tail()

Unnamed: 0,incident_num,date_time,day_of_week,address_number_primary,address_dir_primary,address_road_primary,address_sfx_primary,address_dir_intersecting,address_road_intersecting,address_sfx_intersecting,call_type,disposition,beat,priority
30345,E23010032562,2023-01-23 23:35:53,2,0,,HOITT,ST,,ISLAND,,T,K,515,2
30346,E23010032563,2023-01-23 23:38:24,2,400,,HOTEL CIRCLE SOUTH,,,,,5150,CAN,623,2
30347,E23010032565,2023-01-23 23:39:02,2,600,,32ND,ST,,,,T,K,516,2
30348,E23010032567,2023-01-23 23:44:30,2,13100,,SEA KNOLL,CT,,,,586,O,934,4
30349,E23010032570,2023-01-23 23:49:32,2,0,,E,ST,,16TH,,1131,K,521,1


In [5]:
current_data.describe()

Unnamed: 0,day_of_week,address_number_primary,address_dir_intersecting,address_sfx_intersecting,beat,priority
count,30350.0,30350.0,0.0,0.0,30350.0,30350.0
mean,3.892389,3381.046787,,,492.391993,2.103295
std,1.984939,3384.459204,,,238.326237,1.213443
min,1.0,0.0,,,-1.0,0.0
25%,2.0,600.0,,,313.0,1.0
50%,4.0,2800.0,,,521.0,2.0
75%,6.0,4800.0,,,627.0,3.0
max,7.0,53500.0,,,937.0,9.0


In [6]:
last_year_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 499256 entries, 0 to 499255
Data columns (total 14 columns):
 #   Column                     Non-Null Count   Dtype  
---  ------                     --------------   -----  
 0   incident_num               499256 non-null  object 
 1   date_time                  499256 non-null  object 
 2   day_of_week                499256 non-null  int64  
 3   address_number_primary     499256 non-null  int64  
 4   address_dir_primary        23135 non-null   object 
 5   address_road_primary       499187 non-null  object 
 6   address_sfx_primary        455019 non-null  object 
 7   address_dir_intersecting   0 non-null       float64
 8   address_road_intersecting  85382 non-null   object 
 9   address_sfx_intersecting   0 non-null       float64
 10  call_type                  498640 non-null  object 
 11  disposition                497161 non-null  object 
 12  beat                       499256 non-null  int64  
 13  priority                   49

In [7]:
#Expore Data
def Explore_Date(df):
    #Clean Data
    df = df.dropna(how='all', axis=1)
    
    #Initial table
    freqDF = pd.DataFrame(columns=['Feature',
                                   'Mode',
                                   'Mode Freq.',
                                   'Mode %',
                                   '2nd Mode',
                                   '2nd Mode Freq.',
                                   '2nd Mode %'])
    for col in df.columns:
        freq = df[col].value_counts()
        freqdf = freq.to_frame()
        fRow = freqdf.iloc[0]
        secRow = freqdf.iloc[1]
        fPrct = fRow[0] / len(df[col])
        secPrct = secRow[0] / len(df[col])
        try:
            mode1 = int(fRow.name)
        except:
            mode1 = fRow.name
        try:
            mode2 = int(secRow.name)
        except:
            mode2 = secRow.name
        data = {'Feature':col,
                'Mode':mode1,
                'Mode Freq.':fRow[0],
                'Mode %':fPrct,\
                '2nd Mode':mode2,
                '2nd Mode Freq.':secRow[0],
                '2nd Mode %':secPrct}
        freqDF.loc[len(freqDF)] = data

    freqDF = freqDF.set_index('Feature')

    #Nulls, Counts, Cardinality
    NUllFeatures = round(df.isnull().sum() / df.shape[0],4)\
          .sort_values(ascending=False)
    Count = df.count()
    uni = df.nunique()

    #Formating
    NUllFeatures.to_frame(name="% Miss.")
    Count.to_frame(name="Count")
    uni.to_frame()
    result = pd.concat([Count, NUllFeatures,uni], axis=1)
    result.columns =["Count","% Miss.","Card."]
    result = pd.concat([result, freqDF], axis=1)
    return result

In [8]:
Explore_Date(last_year_data).style.format({'% Miss.': "{:.1%}",
                         'Mode %': "{:.0%}",
                         '2nd Mode %': "{:.0%}",
                         'Count': "{:,}",
                         'Card.': "{:,}",
                         'Mode Freq.': "{:,}",
                        '2nd Mode Freq.': "{:,}"})

Unnamed: 0,Count,% Miss.,Card.,Mode,Mode Freq.,Mode %,2nd Mode,2nd Mode Freq.,2nd Mode %
incident_num,499256,0.0%,499256,E22010000001,1,0%,E22080042092,1,0%
date_time,499256,0.0%,495214,2022-10-25 10:09:39,3,0%,2022-09-21 13:23:37,3,0%
day_of_week,499256,0.0%,7,5,76734,15%,6,74910,15%
address_number_primary,499256,0.0%,233,0,92027,18%,1400,11347,2%
address_dir_primary,23135,95.4%,8,W,8001,2%,S,7227,1%
address_road_primary,499187,0.0%,10076,IMPERIAL,8701,2%,05TH,8389,2%
address_sfx_primary,455019,8.9%,40,ST,146772,29%,AVE,116831,23%
address_road_intersecting,85382,82.9%,4556,MARKET,2263,0%,UNIVERSITY,2213,0%
call_type,498640,0.1%,238,415,42668,9%,CW,24684,5%
disposition,497161,0.4%,19,K,259055,52%,O,71580,14%


In [9]:
def AddressNumberStr(df):
    df['address_number_primary_str'] = df['address_number_primary'].astype(str)
    df.address_number_primary_str.replace('0', np.nan, inplace=True)
    return df

In [22]:
def AddressField(df,City,State):
    df['Address'] = df[['address_number_primary_str',
                        'address_dir_primary',
                        'address_road_primary',
                        'address_sfx_primary']].apply(lambda x: ' '.join(x.dropna()), axis=1)
    df['Address'] = df['Address'] + ' ' + City +', ' + State
    return df

In [23]:
test = AddressNumberStr(current_data)
test = AddressField(current_data,'San Diego','California')

In [24]:
test

Unnamed: 0,incident_num,date_time,day_of_week,address_number_primary,address_dir_primary,address_road_primary,address_sfx_primary,address_dir_intersecting,address_road_intersecting,address_sfx_intersecting,call_type,disposition,beat,priority,address_number_primary_str,Address
0,E23010000001,2023-01-01 00:00:05,1,0,,05TH,AVE,,G,,FD,CAN,523,2,,"05TH AVE San Diego, California"
1,E23010000002,2023-01-01 00:00:30,1,0,,SHOPS INFO LOG,,,,,SHOPS,W,-1,4,,"SHOPS INFO LOG San Diego, California"
2,E23010000003,2023-01-01 00:00:57,1,0,,05TH,AVE,,G,,FD,DUP,523,2,,"05TH AVE San Diego, California"
3,E23010000004,2023-01-01 00:01:15,1,600,,FERGUS,ST,,,,AU1,K,433,1,600,"600 FERGUS ST San Diego, California"
4,E23010000005,2023-01-01 00:02:17,1,0,,CHP INFO LOG,,,,,INFOCHP,W,-1,4,,"CHP INFO LOG San Diego, California"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30345,E23010032562,2023-01-23 23:35:53,2,0,,HOITT,ST,,ISLAND,,T,K,515,2,,"HOITT ST San Diego, California"
30346,E23010032563,2023-01-23 23:38:24,2,400,,HOTEL CIRCLE SOUTH,,,,,5150,CAN,623,2,400,"400 HOTEL CIRCLE SOUTH San Diego, California"
30347,E23010032565,2023-01-23 23:39:02,2,600,,32ND,ST,,,,T,K,516,2,600,"600 32ND ST San Diego, California"
30348,E23010032567,2023-01-23 23:44:30,2,13100,,SEA KNOLL,CT,,,,586,O,934,4,13100,"13100 SEA KNOLL CT San Diego, California"
