In [1]:
# this will eventually be taken out of a ipython notebook and into a .py file, so don't worry about making it pretty. 
# any data exploration done here will be transferred to the data exploration file.

In [2]:
import pandas as pd
import numpy as np
from xgboost.sklearn import XGBClassifier
import matplotlib.pyplot as plt

# enables inline plots
%matplotlib inline

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 20)
pd.set_option('display.precision', 3)

df = pd.read_csv('/Users/eloiserosen/Downloads/train.csv')
df_test = pd.read_csv('/Users/eloiserosen/Downloads/test.csv')

#df = pd.read_csv('/Users/eloiserosen/Downloads/train_small_10k.csv')
#df_test = pd.read_csv('/Users/eloiserosen/Downloads/test_small_10k.csv')
#del df['Unnamed: 0']
#del df_test['Unnamed: 0']

In [3]:
df.head()

Unnamed: 0,Dates,Category,Descript,DayOfWeek,PdDistrict,Resolution,Address,X,Y
0,2015-05-13 23:53:00,WARRANTS,WARRANT ARREST,Wednesday,NORTHERN,"ARREST, BOOKED",OAK ST / LAGUNA ST,-122.426,37.775
1,2015-05-13 23:53:00,OTHER OFFENSES,TRAFFIC VIOLATION ARREST,Wednesday,NORTHERN,"ARREST, BOOKED",OAK ST / LAGUNA ST,-122.426,37.775
2,2015-05-13 23:33:00,OTHER OFFENSES,TRAFFIC VIOLATION ARREST,Wednesday,NORTHERN,"ARREST, BOOKED",VANNESS AV / GREENWICH ST,-122.424,37.8
3,2015-05-13 23:30:00,LARCENY/THEFT,GRAND THEFT FROM LOCKED AUTO,Wednesday,NORTHERN,NONE,1500 Block of LOMBARD ST,-122.427,37.801
4,2015-05-13 23:30:00,LARCENY/THEFT,GRAND THEFT FROM LOCKED AUTO,Wednesday,PARK,NONE,100 Block of BRODERICK ST,-122.439,37.772


In [4]:
df_test.head()

Unnamed: 0,Id,Dates,DayOfWeek,PdDistrict,Address,X,Y
0,0,2015-05-10 23:59:00,Sunday,BAYVIEW,2000 Block of THOMAS AV,-122.4,37.735
1,1,2015-05-10 23:51:00,Sunday,BAYVIEW,3RD ST / REVERE AV,-122.392,37.732
2,2,2015-05-10 23:50:00,Sunday,NORTHERN,2000 Block of GOUGH ST,-122.426,37.792
3,3,2015-05-10 23:45:00,Sunday,INGLESIDE,4700 Block of MISSION ST,-122.437,37.721
4,4,2015-05-10 23:45:00,Sunday,INGLESIDE,4700 Block of MISSION ST,-122.437,37.721


In [5]:
# code to generate small versions of files

'''
df_small = df.tail(10000)
df_small.to_csv('train_small_10k.csv')

df_test_small = df_test.tail(10000)
df_test_small.to_csv('test_small_10k.csv')
'''


"\ndf_small = df.tail(10000)\ndf_small.to_csv('train_small_10k.csv')\n\ndf_test_small = df_test.tail(10000)\ndf_test_small.to_csv('test_small_10k.csv')\n"

In [6]:
def clean_data(df):
    feature_list=df.columns.tolist()
    
    # drop columns we don't need
    if 'Descript' in feature_list:
        del df['Descript']
    if 'Resolution' in feature_list:
        del df['Resolution']
    
    # create columns based on timestamp
    date_time = pd.to_datetime(df['Dates'])
    year = date_time.dt.year
    df['Year'] = year
    month = date_time.dt.month
    df['Month'] = month
    week = date_time.dt.week
    df['Week'] = week
    day = date_time.dt.day
    df['Day'] = day
    hour = date_time.dt.hour
    df['Hour'] = hour
    #some crimes are logged at a precise time. Others, like some thefts, have rounded time
    minute = date_time.dt.minute - 30
    df['Minute'] = minute
    #time = hour*60+minute # counting minutes
    #df['Time'] = time
    del df['Dates']
    
    # column to indicate if address was on a block
    df['StreetCorner'] = df['Address'].str.contains('/').map(int)
    
    return df
    

    

In [7]:
df = clean_data(df)
df.head()


Unnamed: 0,Category,DayOfWeek,PdDistrict,Address,X,Y,Year,Month,Week,Day,Hour,Minute,StreetCorner
0,WARRANTS,Wednesday,NORTHERN,OAK ST / LAGUNA ST,-122.426,37.775,2015,5,20,13,23,23,1
1,OTHER OFFENSES,Wednesday,NORTHERN,OAK ST / LAGUNA ST,-122.426,37.775,2015,5,20,13,23,23,1
2,OTHER OFFENSES,Wednesday,NORTHERN,VANNESS AV / GREENWICH ST,-122.424,37.8,2015,5,20,13,23,3,1
3,LARCENY/THEFT,Wednesday,NORTHERN,1500 Block of LOMBARD ST,-122.427,37.801,2015,5,20,13,23,0,0
4,LARCENY/THEFT,Wednesday,PARK,100 Block of BRODERICK ST,-122.439,37.772,2015,5,20,13,23,0,0


In [8]:
top_addresses = df.copy()
top_addresses['AddressCount'] = 1
top_addresses = top_addresses.groupby('Address').agg({'AddressCount': np.size, 'StreetCorner': np.size})
top_addresses.head()

Unnamed: 0_level_0,AddressCount,StreetCorner
Address,Unnamed: 1_level_1,Unnamed: 2_level_1
0 Block of HARRISON ST,1,1
0 Block of 10TH AV,5,5
0 Block of 10TH ST,49,49
0 Block of 11TH ST,48,48
0 Block of 12TH AV,7,7


In [9]:
top_addresses = top_addresses.sort_values(['AddressCount'], ascending=[False])
top_addresses.head(50)

Unnamed: 0_level_0,AddressCount,StreetCorner
Address,Unnamed: 1_level_1,Unnamed: 2_level_1
800 Block of BRYANT ST,26533,26533
800 Block of MARKET ST,6581,6581
2000 Block of MISSION ST,5097,5097
1000 Block of POTRERO AV,4063,4063
900 Block of MARKET ST,3251,3251
0 Block of TURK ST,3228,3228
0 Block of 6TH ST,2884,2884
300 Block of ELLIS ST,2703,2703
400 Block of ELLIS ST,2590,2590
16TH ST / MISSION ST,2504,2504


In [13]:
top_addresses = top_addresses.ix[:200]
top_addresses.info()

<class 'pandas.core.frame.DataFrame'>
Index: 500 entries, 800 Block of BRYANT ST to 1200 Block of VANNESS AV
Data columns (total 2 columns):
AddressCount    500 non-null int64
StreetCorner    500 non-null int64
dtypes: int64(2)
memory usage: 11.7+ KB


In [14]:
top_addresses.head()

Unnamed: 0_level_0,AddressCount,StreetCorner
Address,Unnamed: 1_level_1,Unnamed: 2_level_1
800 Block of BRYANT ST,26533,26533
800 Block of MARKET ST,6581,6581
2000 Block of MISSION ST,5097,5097
1000 Block of POTRERO AV,4063,4063
900 Block of MARKET ST,3251,3251


In [15]:
addresses_to_keep = top_addresses.index.values
print addresses_to_keep

['800 Block of BRYANT ST' '800 Block of MARKET ST'
 '2000 Block of MISSION ST' '1000 Block of POTRERO AV'
 '900 Block of MARKET ST' '0 Block of TURK ST' '0 Block of 6TH ST'
 '300 Block of ELLIS ST' '400 Block of ELLIS ST' '16TH ST / MISSION ST'
 '1000 Block of MARKET ST' '1100 Block of MARKET ST'
 '2000 Block of MARKET ST' '100 Block of OFARRELL ST'
 '700 Block of MARKET ST' '3200 Block of 20TH AV' '100 Block of 6TH ST'
 '500 Block of JOHNFKENNEDY DR' 'TURK ST / TAYLOR ST'
 '200 Block of TURK ST' '0 Block of PHELAN AV'
 '0 Block of UNITEDNATIONS PZ' '0 Block of POWELL ST'
 '100 Block of EDDY ST' '1400 Block of PHELPS ST' '300 Block of EDDY ST'
 '100 Block of GOLDEN GATE AV' '100 Block of POWELL ST'
 '200 Block of INTERSTATE80 HY' 'MISSION ST / 16TH ST'
 '200 Block of EDDY ST' '900 Block of POTRERO AV' '100 Block of TURK ST'
 '2300 Block of 16TH ST' '400 Block of EDDY ST' '600 Block of VALENCIA ST'
 '700 Block of STANYAN ST' '3300 Block of MISSION ST'
 '400 Block of OFARRELL ST' 'ELLIS 

In [16]:
df.loc[~df.Address.isin(addresses_to_keep), 'Address'] = 'other'
df.head()

Unnamed: 0,Category,DayOfWeek,PdDistrict,Address,X,Y,Year,Month,Week,Day,Hour,Minute,StreetCorner
0,WARRANTS,Wednesday,NORTHERN,other,-122.426,37.775,2015,5,20,13,23,23,1
1,OTHER OFFENSES,Wednesday,NORTHERN,other,-122.426,37.775,2015,5,20,13,23,23,1
2,OTHER OFFENSES,Wednesday,NORTHERN,other,-122.424,37.8,2015,5,20,13,23,3,1
3,LARCENY/THEFT,Wednesday,NORTHERN,other,-122.427,37.801,2015,5,20,13,23,0,0
4,LARCENY/THEFT,Wednesday,PARK,other,-122.439,37.772,2015,5,20,13,23,0,0


In [17]:
df.Address.nunique()

501

In [19]:
# set up dummies
dummy_Address = pd.get_dummies(df['Address'], prefix='Address')
del dummy_Address['Address_other']
del df['Address']
df = df.join(dummy_Address)
df.head()

Unnamed: 0,Category,DayOfWeek,PdDistrict,X,Y,Year,Month,Week,Day,Hour,Minute,StreetCorner,Address_0 Block of 12TH ST,Address_0 Block of 3RD ST,Address_0 Block of 4TH ST,Address_0 Block of 5TH ST,Address_0 Block of 5THSTNORTH ST,Address_0 Block of 6TH ST,Address_0 Block of 7TH ST,Address_0 Block of 8TH ST,Address_0 Block of 9TH ST,Address_0 Block of BLYTHDALE AV,Address_0 Block of BROOKDALE AV,Address_0 Block of CAMERON WY,Address_0 Block of CASHMERE ST,Address_0 Block of CASTRO ST,Address_0 Block of DAKOTA ST,Address_0 Block of DORE ST,Address_0 Block of DRUMM ST,Address_0 Block of FARALLONES ST,Address_0 Block of FELL ST,Address_0 Block of GOLDEN GATE AV,Address_0 Block of GOUGH ST,Address_0 Block of GRANT AV,Address_0 Block of GROVE ST,Address_0 Block of HARBOR RD,Address_0 Block of HAROLD AV,Address_0 Block of HOFF ST,Address_0 Block of JONES ST,Address_0 Block of JULES AV,Address_0 Block of KEZAR DR,Address_0 Block of LEDYARD ST,Address_0 Block of MARINA BL,Address_0 Block of MARKET ST,Address_0 Block of MASON ST,Address_0 Block of MCALLISTER ST,Address_0 Block of MOSS ST,Address_0 Block of NEWMONTGOMERY ST,Address_0 Block of NICHOLS WY,Address_0 Block of NORTHRIDGE RD,Address_0 Block of OFARRELL ST,Address_0 Block of OSCEOLA LN,Address_0 Block of PHELAN AV,Address_0 Block of POWELL ST,Address_0 Block of SGTJOHNVYOUNG LN,Address_0 Block of SHRADER ST,Address_0 Block of SOUTH VAN NESS AV,Address_0 Block of STOCKTON ST,Address_0 Block of TAYLOR ST,Address_0 Block of THE EMBARCADEROSOUTH ST,Address_0 Block of TURK ST,Address_0 Block of UNITED NATIONS PZ,Address_0 Block of UNITEDNATIONS PZ,Address_100 Block of 2ND ST,Address_100 Block of 3RD ST,Address_100 Block of 4TH ST,Address_100 Block of 6TH ST,Address_100 Block of 7TH ST,Address_100 Block of 9TH ST,Address_100 Block of APTOS AV,Address_100 Block of BELVEDERE ST,Address_100 Block of BLYTHDALE AV,Address_100 Block of BROOKDALE AV,Address_100 Block of CAMERON WY,Address_100 Block of CAPP ST,Address_100 Block of DAKOTA ST,Address_100 Block of EDDY ST,Address_100 Block of ELLIS ST,Address_100 Block of FONT BL,Address_100 Block of GOLDEN GATE AV,Address_100 Block of GROVE ST,Address_100 Block of HIGHLAND AV,Address_100 Block of HYDE ST,Address_100 Block of JEFFERSON ST,Address_100 Block of JOHNFKENNEDY DR,Address_100 Block of JONES ST,Address_100 Block of JULIAN AV,Address_100 Block of KISKA RD,Address_100 Block of LARKIN ST,Address_100 Block of LEAVENWORTH ST,Address_100 Block of MASON ST,Address_100 Block of MCALLISTER ST,Address_100 Block of OFARRELL ST,Address_100 Block of POST ST,Address_100 Block of POWELL ST,Address_100 Block of STOCKTON ST,Address_100 Block of TAYLOR ST,Address_100 Block of TURK ST,Address_100 Block of VALENCIA ST,Address_100 Block of WESTPOINT RD,Address_1000 Block of BUSH ST,Address_1000 Block of CAYUGA AV,Address_1000 Block of CONNECTICUT ST,Address_1000 Block of FITZGERALD AV,Address_1000 Block of FOLSOM ST,Address_1000 Block of GEARY ST,Address_1000 Block of HARRISON ST,Address_1000 Block of HOWARD ST,Address_1000 Block of HYDE ST,Address_1000 Block of MARKET ST,Address_1000 Block of MISSION ST,Address_1000 Block of OAKDALE AV,Address_1000 Block of POINTLOBOS AV,Address_1000 Block of POLK ST,Address_1000 Block of POST ST,Address_1000 Block of POTRERO AV,Address_1000 Block of SUTTER ST,Address_1000 Block of VANNESS AV,Address_10TH ST / HARRISON ST,Address_1100 Block of FILLMORE ST,Address_1100 Block of FOLSOM ST,Address_1100 Block of FRANCISCO ST,Address_1100 Block of HOWARD ST,Address_1100 Block of MARKET ST,Address_1100 Block of MISSION ST,Address_1100 Block of OAKDALE AV,Address_1100 Block of PALOU AV,Address_1100 Block of POTRERO AV,Address_1100 Block of SCOTT ST,Address_1100 Block of SUTTER ST,Address_1100 Block of TURK ST,Address_1200 Block of EDDY ST,Address_1200 Block of HOWARD ST,Address_1200 Block of IRVING ST,Address_1200 Block of MARKET ST,Address_1200 Block of MISSION ST,Address_1200 Block of PAGE ST,Address_1200 Block of POLK ST,Address_1200 Block of SOUTH VAN NESS AV,Address_1200 Block of VANNESS AV,Address_1300 Block of BUSH ST,Address_1300 Block of MARKET ST,Address_1300 Block of MISSION ST,Address_1300 Block of REVERE AV,Address_1300 Block of WEBSTER ST,Address_1400 Block of HAIGHT ST,Address_1400 Block of MARKET ST,Address_1400 Block of PHELPS ST,Address_1400 Block of PINE ST,Address_1400 Block of THOMAS AV,Address_1500 Block of BAY SHORE BL,Address_1500 Block of EDDY ST,Address_1500 Block of FILLMORE ST,Address_1500 Block of HAIGHT ST,Address_1500 Block of MARKET ST,Address_1500 Block of MISSION ST,Address_1500 Block of POLK ST,Address_1500 Block of SLOAT BL,Address_1500 Block of SUNNYDALE AV,Address_1500 Block of VANNESS AV,Address_1600 Block of HAIGHT ST,Address_1600 Block of MARKET ST,Address_1600 Block of MCKINNON AV,Address_1600 Block of MISSION ST,Address_1600 Block of NEWCOMB AV,Address_1600 Block of PALOU AV,Address_1600 Block of SUNNYDALE AV,Address_1600 Block of THE EMBARCADERONORTH ST,Address_16TH ST / CAPP ST,Address_16TH ST / MISSION ST,Address_16TH ST / POTRERO AV,Address_16TH ST / SHOTWELL ST,Address_16TH ST / VALENCIA ST,Address_1700 Block of CALIFORNIA ST,Address_1700 Block of CESAR CHAVEZ ST,Address_1700 Block of FULTON ST,Address_1700 Block of HAIGHT ST,Address_1700 Block of HARRISON ST,Address_1700 Block of LASALLE AV,Address_1700 Block of MARKET ST,Address_1700 Block of MISSION ST,Address_1700 Block of NEWCOMB AV,Address_1700 Block of SUNNYDALE AV,Address_17TH ST / CAPP ST,Address_17TH ST / MISSION ST,Address_17TH ST / SHOTWELL ST,Address_1800 Block of 25TH ST,Address_1800 Block of FOLSOM ST,Address_1800 Block of HAIGHT ST,Address_1800 Block of MISSION ST,Address_1800 Block of OCEAN AV,Address_1800 Block of SUNNYDALE AV,Address_1800 Block of VICENTE ST,Address_1800 Block of WALLER ST,Address_18TH ST / CASTRO ST,Address_1900 Block of MISSION ST,Address_1900 Block of SUNNYDALE AV,Address_1900 Block of UNION ST,Address_19TH ST / SHOTWELL ST,Address_200 Block of 3RD ST,Address_200 Block of 6TH ST,Address_200 Block of 7TH ST,Address_200 Block of 8TH ST,Address_200 Block of 9TH ST,Address_200 Block of BLYTHDALE AV,Address_200 Block of EDDY ST,Address_200 Block of GEARY ST,Address_200 Block of GOLDEN GATE AV,Address_200 Block of HYDE ST,Address_200 Block of I-80,Address_200 Block of INTERSTATE80 HY,Address_200 Block of JEFFERSON ST,Address_200 Block of JONES ST,Address_200 Block of KING ST,Address_200 Block of LEAVENWORTH ST,Address_200 Block of MASON ST,Address_200 Block of MCALLISTER ST,Address_200 Block of OFARRELL ST,Address_200 Block of POST ST,Address_200 Block of POWELL ST,Address_200 Block of SUTTER ST,Address_200 Block of TURK ST,Address_200 Block of WILLIAMS AV,Address_200 Block of WINSTON DR,Address_2000 Block of CHESTNUT ST,Address_2000 Block of MARKET ST,Address_2000 Block of MISSION ST,Address_20TH ST / CAPP ST,Address_2100 Block of 24TH AV,Address_2100 Block of CHESTNUT ST,Address_2100 Block of MARKET ST,Address_2100 Block of MISSION ST,Address_2200 Block of 14TH AV,Address_2200 Block of MARKET ST,Address_2200 Block of MISSION ST,Address_2300 Block of 16TH ST,Address_2300 Block of 24TH AV,Address_2300 Block of FOLSOM ST,Address_2300 Block of MARKET ST,Address_2300 Block of MISSION ST,Address_2400 Block of GEARY BL,Address_2400 Block of MISSION ST,Address_2400 Block of SAN BRUNO AV,Address_24TH ST / MISSION ST,Address_2500 Block of MISSION ST,Address_2500 Block of VANNESS AV,Address_2600 Block of GEARY BL,Address_2600 Block of MISSION ST,Address_2700 Block of MISSION ST,Address_2800 Block of MISSION ST,Address_2800 Block of TAYLOR ST,Address_2900 Block of 16TH ST,Address_300 Block of 11TH ST,Address_300 Block of 9TH ST,Address_300 Block of BATTERY ST,Address_300 Block of BAY SHORE BL,Address_300 Block of BAY ST,Address_300 Block of BEACH ST,Address_300 Block of CLEMENTINA ST,Address_300 Block of COLUMBUS AV,Address_300 Block of EDDY ST,Address_300 Block of ELLIS ST,Address_300 Block of GEARY ST,Address_300 Block of GOLDEN GATE AV,Address_300 Block of HYDE ST,Address_300 Block of JEFFERSON ST,Address_300 Block of LAGUNAHONDA BL,Address_300 Block of LEAVENWORTH ST,Address_300 Block of MASON ST,Address_300 Block of OFARRELL ST,Address_300 Block of POST ST,Address_300 Block of POWELL ST,Address_300 Block of STOCKTON ST,Address_300 Block of SUTTER ST,Address_300 Block of TURK ST,Address_300 Block of VALENCIA ST,Address_300 Block of WILLIAMS AV,Address_3000 Block of 16TH ST,Address_3100 Block of 16TH ST,Address_3100 Block of 24TH ST,Address_3100 Block of FILLMORE ST,Address_3100 Block of MISSION ST,Address_3200 Block of 20TH AV,Address_3200 Block of MISSION ST,Address_3200 Block of PIERCE ST,Address_3300 Block of MISSION ST,Address_3500 Block of CESAR CHAVEZ ST,Address_3600 Block of LYON ST,Address_3700 Block of 18TH ST,Address_3800 Block of 3RD ST,Address_3RD ST / NEWCOMB AV,Address_3RD ST / PALOU AV,Address_400 Block of 10TH ST,Address_400 Block of 6TH AV,Address_400 Block of 7TH ST,Address_400 Block of BROADWAY ST,Address_400 Block of CASTRO ST,Address_400 Block of CHURCH ST,Address_400 Block of EDDY ST,Address_400 Block of ELLIS ST,Address_400 Block of GEARY ST,Address_400 Block of HAIGHT ST,Address_400 Block of HYDE ST,Address_400 Block of JESSIE ST,Address_400 Block of JONES ST,Address_400 Block of LEAVENWORTH ST,Address_400 Block of MASON ST,Address_400 Block of MINNA ST,Address_400 Block of OFARRELL ST,Address_400 Block of POST ST,Address_400 Block of POWELL ST,Address_400 Block of RAYMOND AV,Address_400 Block of STANYAN ST,Address_400 Block of STEVENSON ST,Address_400 Block of STOCKTON ST,Address_400 Block of SUTTER ST,Address_400 Block of TURK ST,Address_400 Block of VALENCIA ST,Address_4000 Block of 18TH ST,Address_4100 Block of 18TH ST,Address_4500 Block of MISSION ST,Address_4600 Block of MISSION ST,Address_4700 Block of MISSION ST,Address_4900 Block of MISSION ST,Address_4TH ST / MARKET ST,Address_4TH ST / MISSION ST,Address_500 Block of 4TH ST,Address_500 Block of 5TH ST,Address_500 Block of 9TH ST,Address_500 Block of BRANNAN ST,Address_500 Block of BROADWAY ST,Address_500 Block of CAPP ST,Address_500 Block of CASTRO ST,Address_500 Block of DOLORES ST,Address_500 Block of EDDY ST,Address_500 Block of ELLIS ST,Address_500 Block of GEARY ST,Address_500 Block of HAIGHT ST,Address_500 Block of HOWARD ST,Address_500 Block of HYDE ST,Address_500 Block of JOHN F KENNEDY DR,Address_500 Block of JOHNFKENNEDY DR,Address_500 Block of JONES ST,Address_500 Block of LEAVENWORTH ST,Address_500 Block of MARKET ST,Address_500 Block of MINNA ST,Address_500 Block of OFARRELL ST,Address_500 Block of POST ST,Address_500 Block of SOUTH VAN NESS AV,Address_500 Block of STEVENSON ST,Address_500 Block of SUTTER ST,Address_500 Block of VALENCIA ST,Address_5100 Block of MISSION ST,Address_5200 Block of DIAMONDHEIGHTS BL,Address_5TH ST / HARRISON ST,Address_5TH ST / MARKET ST,Address_5TH ST / MISSION ST,Address_600 Block of 32ND AV,Address_600 Block of EDDY ST,Address_600 Block of ELLIS ST,Address_600 Block of GEARY ST,Address_600 Block of JAMESTOWN AV,Address_600 Block of LARCH ST,Address_600 Block of MARKET ST,Address_600 Block of MISSION ST,Address_600 Block of NATOMA ST,Address_600 Block of OFARRELL ST,Address_600 Block of POST ST,Address_600 Block of SUTTER ST,Address_600 Block of VALENCIA ST,Address_6TH ST / HOWARD ST,Address_6TH ST / JESSIE ST,Address_6TH ST / MARKET ST,Address_6TH ST / MINNA ST,Address_6TH ST / MISSION ST,Address_6TH ST / STEVENSON ST,Address_700 Block of ELLIS ST,Address_700 Block of GEARY ST,Address_700 Block of HARRISON ST,Address_700 Block of HOWARD ST,Address_700 Block of KEARNY ST,Address_700 Block of KIRKWOOD AV,Address_700 Block of MARKET ST,Address_700 Block of MISSION ST,Address_700 Block of MISSOURI ST,Address_700 Block of OFARRELL ST,Address_700 Block of PACIFIC AV,Address_700 Block of POLK ST,Address_700 Block of POST ST,Address_700 Block of STANYAN ST,Address_700 Block of VALLEJO ST,Address_7TH ST / MARKET ST,Address_7TH ST / MISSION ST,Address_800 Block of 3RD ST,Address_800 Block of BRANNAN ST,Address_800 Block of BRYANT ST,Address_800 Block of CAPP ST,Address_800 Block of CENTRAL AV,Address_800 Block of FOLSOM ST,Address_800 Block of GEARY ST,Address_800 Block of HARRISON ST,Address_800 Block of HOWARD ST,Address_800 Block of LA PLAYA ST,Address_800 Block of MARKET ST,Address_800 Block of MISSION ST,Address_800 Block of OFARRELL ST,Address_800 Block of PACIFIC AV,Address_800 Block of POST ST,Address_800 Block of POTRERO AV,Address_800 Block of SUTTER ST,Address_800 Block of VALENCIA ST,Address_8TH ST / MARKET ST,Address_8TH ST / MISSION ST,Address_900 Block of CONNECTICUT ST,Address_900 Block of ELLSWORTH ST,Address_900 Block of FOLSOM ST,Address_900 Block of GEARY ST,Address_900 Block of HOWARD ST,Address_900 Block of HYDE ST,Address_900 Block of MARKET ST,Address_900 Block of MISSION ST,Address_900 Block of POST ST,Address_900 Block of POTRERO AV,Address_900 Block of SUTTER ST,Address_9TH ST / MISSION ST,Address_BROADWAY ST / COLUMBUS AV,Address_BROADWAY ST / KEARNY ST,Address_CAPP ST / 17TH ST,Address_CASTRO ST / MARKET ST,Address_EDDY ST / HYDE ST,Address_EDDY ST / JONES ST,Address_EDDY ST / LARKIN ST,Address_EDDY ST / LEAVENWORTH ST,Address_EDDY ST / MASON ST,Address_EDDY ST / TAYLOR ST,Address_ELLIS ST / HYDE ST,Address_ELLIS ST / JONES ST,Address_ELLIS ST / LEAVENWORTH ST,Address_FILLMORE ST / GEARY BL,Address_GEARY BL / FILLMORE ST,Address_GEARY BL / LAGUNA ST,Address_GEARY BL / STEINER ST,Address_GEARY BL / WEBSTER ST,Address_GEARY ST / LARKIN ST,Address_GEARY ST / POWELL ST,Address_GENEVA AV / MISSION ST,Address_GOLDEN GATE AV / HYDE ST,Address_GOLDEN GATE AV / JONES ST,Address_GOLDEN GATE AV / LEAVENWORTH ST,Address_HAIGHT ST / ASHBURY ST,Address_HAIGHT ST / COLE ST,Address_HAIGHT ST / MASONIC AV,Address_HAIGHT ST / SHRADER ST,Address_HAIGHT ST / STANYAN ST,Address_HEMLOCK ST / LARKIN ST,Address_HYDE ST / EDDY ST,Address_HYDE ST / ELLIS ST,Address_HYDE ST / GOLDEN GATE AV,Address_HYDE ST / OFARRELL ST,Address_HYDE ST / TURK ST,Address_JONES ST / ELLIS ST,Address_JONES ST / GOLDEN GATE AV,Address_JONES ST / MCALLISTER ST,Address_JONES ST / OFARRELL ST,Address_JONES ST / TURK ST,Address_LARKIN ST / OFARRELL ST,Address_LARKIN ST / SUTTER ST,Address_LEAVENWORTH ST / ELLIS ST,Address_LEAVENWORTH ST / GOLDEN GATE AV,Address_LEAVENWORTH ST / TURK ST,Address_MARKET ST / 4TH ST,Address_MARKET ST / 5TH ST,Address_MARKET ST / 6TH ST,Address_MARKET ST / 7TH ST,Address_MARKET ST / 8TH ST,Address_MARKET ST / JONES ST,Address_MARKET ST / MASON ST,Address_MARKET ST / POWELL ST,Address_MARKET ST / TAYLOR ST,Address_MARKET ST / VANNESS AV,Address_MASON ST / TURK ST,Address_MISSION ST / 16TH ST,Address_MISSION ST / 17TH ST,Address_MISSION ST / 18TH ST,Address_MISSION ST / 24TH ST,Address_MISSION ST / 4TH ST,Address_MISSION ST / 5TH ST,Address_MISSION ST / 6TH ST,Address_MISSION ST / 7TH ST,Address_MISSION ST / 8TH ST,Address_MISSION ST / GENEVA AV,Address_MISSION ST / SILVER AV,Address_OFARRELL ST / HYDE ST,Address_OFARRELL ST / JONES ST,Address_OFARRELL ST / LARKIN ST,Address_POLK ST / CALIFORNIA ST,Address_POLK ST / POST ST,Address_POLK ST / SUTTER ST,Address_POST ST / LARKIN ST,Address_POWELL ST / GEARY ST,Address_POWELL ST / MARKET ST,Address_SHOTWELL ST / 17TH ST,Address_STOCKTON ST / SUTTER ST,Address_SUTTER ST / LARKIN ST,Address_SUTTER ST / STOCKTON ST,Address_TAYLOR ST / EDDY ST,Address_TAYLOR ST / TURK ST,Address_TURK ST / HYDE ST,Address_TURK ST / JONES ST,Address_TURK ST / LEAVENWORTH ST,Address_TURK ST / MASON ST,Address_TURK ST / TAYLOR ST
0,WARRANTS,Wednesday,NORTHERN,-122.426,37.775,2015,5,20,13,23,23,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,OTHER OFFENSES,Wednesday,NORTHERN,-122.426,37.775,2015,5,20,13,23,23,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,OTHER OFFENSES,Wednesday,NORTHERN,-122.424,37.8,2015,5,20,13,23,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,LARCENY/THEFT,Wednesday,NORTHERN,-122.427,37.801,2015,5,20,13,23,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,LARCENY/THEFT,Wednesday,PARK,-122.439,37.772,2015,5,20,13,23,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [504]:
number_categories = df.Category.nunique()
print number_categories

39


In [505]:
df_test = clean_data(df_test)
df_test.head()

Unnamed: 0,Id,DayOfWeek,PdDistrict,X,Y,Year,Month,Week,Day,Hour,Minute,StreetCorner
0,0,Sunday,BAYVIEW,-122.4,37.735,2015,5,19,10,23,29,0
1,1,Sunday,BAYVIEW,-122.392,37.732,2015,5,19,10,23,21,1
2,2,Sunday,NORTHERN,-122.426,37.792,2015,5,19,10,23,20,0
3,3,Sunday,INGLESIDE,-122.437,37.721,2015,5,19,10,23,15,0
4,4,Sunday,INGLESIDE,-122.437,37.721,2015,5,19,10,23,15,0


In [18]:
#repeat Address dummy process for test dataframe
df_test.loc[~df_test.Address.isin(addresses_to_keep), 'Address'] = 'other'


In [20]:
dummy_Address = pd.get_dummies(df_test['Address'], prefix='Address')
del dummy_Address['Address_other']
del df_test['Address']
df_test = df_test.join(dummy_Address)
df_test.head()

Unnamed: 0,Id,Dates,DayOfWeek,PdDistrict,X,Y,Address_0 Block of 12TH ST,Address_0 Block of 3RD ST,Address_0 Block of 4TH ST,Address_0 Block of 5TH ST,Address_0 Block of 5THSTNORTH ST,Address_0 Block of 6TH ST,Address_0 Block of 7TH ST,Address_0 Block of 8TH ST,Address_0 Block of 9TH ST,Address_0 Block of BLYTHDALE AV,Address_0 Block of BROOKDALE AV,Address_0 Block of CAMERON WY,Address_0 Block of CASHMERE ST,Address_0 Block of CASTRO ST,Address_0 Block of DAKOTA ST,Address_0 Block of DORE ST,Address_0 Block of DRUMM ST,Address_0 Block of FARALLONES ST,Address_0 Block of FELL ST,Address_0 Block of GOLDEN GATE AV,Address_0 Block of GOUGH ST,Address_0 Block of GRANT AV,Address_0 Block of GROVE ST,Address_0 Block of HARBOR RD,Address_0 Block of HAROLD AV,Address_0 Block of HOFF ST,Address_0 Block of JONES ST,Address_0 Block of JULES AV,Address_0 Block of KEZAR DR,Address_0 Block of LEDYARD ST,Address_0 Block of MARINA BL,Address_0 Block of MARKET ST,Address_0 Block of MASON ST,Address_0 Block of MCALLISTER ST,Address_0 Block of MOSS ST,Address_0 Block of NEWMONTGOMERY ST,Address_0 Block of NICHOLS WY,Address_0 Block of NORTHRIDGE RD,Address_0 Block of OFARRELL ST,Address_0 Block of OSCEOLA LN,Address_0 Block of PHELAN AV,Address_0 Block of POWELL ST,Address_0 Block of SGTJOHNVYOUNG LN,Address_0 Block of SHRADER ST,Address_0 Block of SOUTH VAN NESS AV,Address_0 Block of STOCKTON ST,Address_0 Block of TAYLOR ST,Address_0 Block of THE EMBARCADEROSOUTH ST,Address_0 Block of TURK ST,Address_0 Block of UNITED NATIONS PZ,Address_0 Block of UNITEDNATIONS PZ,Address_100 Block of 2ND ST,Address_100 Block of 3RD ST,Address_100 Block of 4TH ST,Address_100 Block of 6TH ST,Address_100 Block of 7TH ST,Address_100 Block of 9TH ST,Address_100 Block of APTOS AV,Address_100 Block of BELVEDERE ST,Address_100 Block of BLYTHDALE AV,Address_100 Block of BROOKDALE AV,Address_100 Block of CAMERON WY,Address_100 Block of CAPP ST,Address_100 Block of DAKOTA ST,Address_100 Block of EDDY ST,Address_100 Block of ELLIS ST,Address_100 Block of FONT BL,Address_100 Block of GOLDEN GATE AV,Address_100 Block of GROVE ST,Address_100 Block of HIGHLAND AV,Address_100 Block of HYDE ST,Address_100 Block of JEFFERSON ST,Address_100 Block of JOHNFKENNEDY DR,Address_100 Block of JONES ST,Address_100 Block of JULIAN AV,Address_100 Block of KISKA RD,Address_100 Block of LARKIN ST,Address_100 Block of LEAVENWORTH ST,Address_100 Block of MASON ST,Address_100 Block of MCALLISTER ST,Address_100 Block of OFARRELL ST,Address_100 Block of POST ST,Address_100 Block of POWELL ST,Address_100 Block of STOCKTON ST,Address_100 Block of TAYLOR ST,Address_100 Block of TURK ST,Address_100 Block of VALENCIA ST,Address_100 Block of WESTPOINT RD,Address_1000 Block of BUSH ST,Address_1000 Block of CAYUGA AV,Address_1000 Block of CONNECTICUT ST,Address_1000 Block of FITZGERALD AV,Address_1000 Block of FOLSOM ST,Address_1000 Block of GEARY ST,Address_1000 Block of HARRISON ST,Address_1000 Block of HOWARD ST,Address_1000 Block of HYDE ST,Address_1000 Block of MARKET ST,Address_1000 Block of MISSION ST,Address_1000 Block of OAKDALE AV,Address_1000 Block of POINTLOBOS AV,Address_1000 Block of POLK ST,Address_1000 Block of POST ST,Address_1000 Block of POTRERO AV,Address_1000 Block of SUTTER ST,Address_1000 Block of VANNESS AV,Address_10TH ST / HARRISON ST,Address_1100 Block of FILLMORE ST,Address_1100 Block of FOLSOM ST,Address_1100 Block of FRANCISCO ST,Address_1100 Block of HOWARD ST,Address_1100 Block of MARKET ST,Address_1100 Block of MISSION ST,Address_1100 Block of OAKDALE AV,Address_1100 Block of PALOU AV,Address_1100 Block of POTRERO AV,Address_1100 Block of SCOTT ST,Address_1100 Block of SUTTER ST,Address_1100 Block of TURK ST,Address_1200 Block of EDDY ST,Address_1200 Block of HOWARD ST,Address_1200 Block of IRVING ST,Address_1200 Block of MARKET ST,Address_1200 Block of MISSION ST,Address_1200 Block of PAGE ST,Address_1200 Block of POLK ST,Address_1200 Block of SOUTH VAN NESS AV,Address_1200 Block of VANNESS AV,Address_1300 Block of BUSH ST,Address_1300 Block of MARKET ST,Address_1300 Block of MISSION ST,Address_1300 Block of REVERE AV,Address_1300 Block of WEBSTER ST,Address_1400 Block of HAIGHT ST,Address_1400 Block of MARKET ST,Address_1400 Block of PHELPS ST,Address_1400 Block of PINE ST,Address_1400 Block of THOMAS AV,Address_1500 Block of BAY SHORE BL,Address_1500 Block of EDDY ST,Address_1500 Block of FILLMORE ST,Address_1500 Block of HAIGHT ST,Address_1500 Block of MARKET ST,Address_1500 Block of MISSION ST,Address_1500 Block of POLK ST,Address_1500 Block of SLOAT BL,Address_1500 Block of SUNNYDALE AV,Address_1500 Block of VANNESS AV,Address_1600 Block of HAIGHT ST,Address_1600 Block of MARKET ST,Address_1600 Block of MCKINNON AV,Address_1600 Block of MISSION ST,Address_1600 Block of NEWCOMB AV,Address_1600 Block of PALOU AV,Address_1600 Block of SUNNYDALE AV,Address_1600 Block of THE EMBARCADERONORTH ST,Address_16TH ST / CAPP ST,Address_16TH ST / MISSION ST,Address_16TH ST / POTRERO AV,Address_16TH ST / SHOTWELL ST,Address_16TH ST / VALENCIA ST,Address_1700 Block of CALIFORNIA ST,Address_1700 Block of CESAR CHAVEZ ST,Address_1700 Block of FULTON ST,Address_1700 Block of HAIGHT ST,Address_1700 Block of HARRISON ST,Address_1700 Block of LASALLE AV,Address_1700 Block of MARKET ST,Address_1700 Block of MISSION ST,Address_1700 Block of NEWCOMB AV,Address_1700 Block of SUNNYDALE AV,Address_17TH ST / CAPP ST,Address_17TH ST / MISSION ST,Address_17TH ST / SHOTWELL ST,Address_1800 Block of 25TH ST,Address_1800 Block of FOLSOM ST,Address_1800 Block of HAIGHT ST,Address_1800 Block of MISSION ST,Address_1800 Block of OCEAN AV,Address_1800 Block of SUNNYDALE AV,Address_1800 Block of VICENTE ST,Address_1800 Block of WALLER ST,Address_18TH ST / CASTRO ST,Address_1900 Block of MISSION ST,Address_1900 Block of SUNNYDALE AV,Address_1900 Block of UNION ST,Address_19TH ST / SHOTWELL ST,Address_200 Block of 3RD ST,Address_200 Block of 6TH ST,Address_200 Block of 7TH ST,Address_200 Block of 8TH ST,Address_200 Block of 9TH ST,Address_200 Block of BLYTHDALE AV,Address_200 Block of EDDY ST,Address_200 Block of GEARY ST,Address_200 Block of GOLDEN GATE AV,Address_200 Block of HYDE ST,Address_200 Block of I-80,Address_200 Block of INTERSTATE80 HY,Address_200 Block of JEFFERSON ST,Address_200 Block of JONES ST,Address_200 Block of KING ST,Address_200 Block of LEAVENWORTH ST,Address_200 Block of MASON ST,Address_200 Block of MCALLISTER ST,Address_200 Block of OFARRELL ST,Address_200 Block of POST ST,Address_200 Block of POWELL ST,Address_200 Block of SUTTER ST,Address_200 Block of TURK ST,Address_200 Block of WILLIAMS AV,Address_200 Block of WINSTON DR,Address_2000 Block of CHESTNUT ST,Address_2000 Block of MARKET ST,Address_2000 Block of MISSION ST,Address_20TH ST / CAPP ST,Address_2100 Block of 24TH AV,Address_2100 Block of CHESTNUT ST,Address_2100 Block of MARKET ST,Address_2100 Block of MISSION ST,Address_2200 Block of 14TH AV,Address_2200 Block of MARKET ST,Address_2200 Block of MISSION ST,Address_2300 Block of 16TH ST,Address_2300 Block of 24TH AV,Address_2300 Block of FOLSOM ST,Address_2300 Block of MARKET ST,Address_2300 Block of MISSION ST,Address_2400 Block of GEARY BL,Address_2400 Block of MISSION ST,Address_2400 Block of SAN BRUNO AV,Address_24TH ST / MISSION ST,Address_2500 Block of MISSION ST,Address_2500 Block of VANNESS AV,Address_2600 Block of GEARY BL,Address_2600 Block of MISSION ST,Address_2700 Block of MISSION ST,Address_2800 Block of MISSION ST,Address_2800 Block of TAYLOR ST,Address_2900 Block of 16TH ST,Address_300 Block of 11TH ST,Address_300 Block of 9TH ST,Address_300 Block of BATTERY ST,Address_300 Block of BAY SHORE BL,Address_300 Block of BAY ST,Address_300 Block of BEACH ST,Address_300 Block of CLEMENTINA ST,Address_300 Block of COLUMBUS AV,Address_300 Block of EDDY ST,Address_300 Block of ELLIS ST,Address_300 Block of GEARY ST,Address_300 Block of GOLDEN GATE AV,Address_300 Block of HYDE ST,Address_300 Block of JEFFERSON ST,Address_300 Block of LAGUNAHONDA BL,Address_300 Block of LEAVENWORTH ST,Address_300 Block of MASON ST,Address_300 Block of OFARRELL ST,Address_300 Block of POST ST,Address_300 Block of POWELL ST,Address_300 Block of STOCKTON ST,Address_300 Block of SUTTER ST,Address_300 Block of TURK ST,Address_300 Block of VALENCIA ST,Address_300 Block of WILLIAMS AV,Address_3000 Block of 16TH ST,Address_3100 Block of 16TH ST,Address_3100 Block of 24TH ST,Address_3100 Block of FILLMORE ST,Address_3100 Block of MISSION ST,Address_3200 Block of 20TH AV,Address_3200 Block of MISSION ST,Address_3200 Block of PIERCE ST,Address_3300 Block of MISSION ST,Address_3500 Block of CESAR CHAVEZ ST,Address_3600 Block of LYON ST,Address_3700 Block of 18TH ST,Address_3800 Block of 3RD ST,Address_3RD ST / NEWCOMB AV,Address_3RD ST / PALOU AV,Address_400 Block of 10TH ST,Address_400 Block of 6TH AV,Address_400 Block of 7TH ST,Address_400 Block of BROADWAY ST,Address_400 Block of CASTRO ST,Address_400 Block of CHURCH ST,Address_400 Block of EDDY ST,Address_400 Block of ELLIS ST,Address_400 Block of GEARY ST,Address_400 Block of HAIGHT ST,Address_400 Block of HYDE ST,Address_400 Block of JESSIE ST,Address_400 Block of JONES ST,Address_400 Block of LEAVENWORTH ST,Address_400 Block of MASON ST,Address_400 Block of MINNA ST,Address_400 Block of OFARRELL ST,Address_400 Block of POST ST,Address_400 Block of POWELL ST,Address_400 Block of RAYMOND AV,Address_400 Block of STANYAN ST,Address_400 Block of STEVENSON ST,Address_400 Block of STOCKTON ST,Address_400 Block of SUTTER ST,Address_400 Block of TURK ST,Address_400 Block of VALENCIA ST,Address_4000 Block of 18TH ST,Address_4100 Block of 18TH ST,Address_4500 Block of MISSION ST,Address_4600 Block of MISSION ST,Address_4700 Block of MISSION ST,Address_4900 Block of MISSION ST,Address_4TH ST / MARKET ST,Address_4TH ST / MISSION ST,Address_500 Block of 4TH ST,Address_500 Block of 5TH ST,Address_500 Block of 9TH ST,Address_500 Block of BRANNAN ST,Address_500 Block of BROADWAY ST,Address_500 Block of CAPP ST,Address_500 Block of CASTRO ST,Address_500 Block of DOLORES ST,Address_500 Block of EDDY ST,Address_500 Block of ELLIS ST,Address_500 Block of GEARY ST,Address_500 Block of HAIGHT ST,Address_500 Block of HOWARD ST,Address_500 Block of HYDE ST,Address_500 Block of JOHN F KENNEDY DR,Address_500 Block of JOHNFKENNEDY DR,Address_500 Block of JONES ST,Address_500 Block of LEAVENWORTH ST,Address_500 Block of MARKET ST,Address_500 Block of MINNA ST,Address_500 Block of OFARRELL ST,Address_500 Block of POST ST,Address_500 Block of SOUTH VAN NESS AV,Address_500 Block of STEVENSON ST,Address_500 Block of SUTTER ST,Address_500 Block of VALENCIA ST,Address_5100 Block of MISSION ST,Address_5200 Block of DIAMONDHEIGHTS BL,Address_5TH ST / HARRISON ST,Address_5TH ST / MARKET ST,Address_5TH ST / MISSION ST,Address_600 Block of 32ND AV,Address_600 Block of EDDY ST,Address_600 Block of ELLIS ST,Address_600 Block of GEARY ST,Address_600 Block of JAMESTOWN AV,Address_600 Block of LARCH ST,Address_600 Block of MARKET ST,Address_600 Block of MISSION ST,Address_600 Block of NATOMA ST,Address_600 Block of OFARRELL ST,Address_600 Block of POST ST,Address_600 Block of SUTTER ST,Address_600 Block of VALENCIA ST,Address_6TH ST / HOWARD ST,Address_6TH ST / JESSIE ST,Address_6TH ST / MARKET ST,Address_6TH ST / MINNA ST,Address_6TH ST / MISSION ST,Address_6TH ST / STEVENSON ST,Address_700 Block of ELLIS ST,Address_700 Block of GEARY ST,Address_700 Block of HARRISON ST,Address_700 Block of HOWARD ST,Address_700 Block of KEARNY ST,Address_700 Block of KIRKWOOD AV,Address_700 Block of MARKET ST,Address_700 Block of MISSION ST,Address_700 Block of MISSOURI ST,Address_700 Block of OFARRELL ST,Address_700 Block of PACIFIC AV,Address_700 Block of POLK ST,Address_700 Block of POST ST,Address_700 Block of STANYAN ST,Address_700 Block of VALLEJO ST,Address_7TH ST / MARKET ST,Address_7TH ST / MISSION ST,Address_800 Block of 3RD ST,Address_800 Block of BRANNAN ST,Address_800 Block of BRYANT ST,Address_800 Block of CAPP ST,Address_800 Block of CENTRAL AV,Address_800 Block of FOLSOM ST,Address_800 Block of GEARY ST,Address_800 Block of HARRISON ST,Address_800 Block of HOWARD ST,Address_800 Block of LA PLAYA ST,Address_800 Block of MARKET ST,Address_800 Block of MISSION ST,Address_800 Block of OFARRELL ST,Address_800 Block of PACIFIC AV,Address_800 Block of POST ST,Address_800 Block of POTRERO AV,Address_800 Block of SUTTER ST,Address_800 Block of VALENCIA ST,Address_8TH ST / MARKET ST,Address_8TH ST / MISSION ST,Address_900 Block of CONNECTICUT ST,Address_900 Block of ELLSWORTH ST,Address_900 Block of FOLSOM ST,Address_900 Block of GEARY ST,Address_900 Block of HOWARD ST,Address_900 Block of HYDE ST,Address_900 Block of MARKET ST,Address_900 Block of MISSION ST,Address_900 Block of POST ST,Address_900 Block of POTRERO AV,Address_900 Block of SUTTER ST,Address_9TH ST / MISSION ST,Address_BROADWAY ST / COLUMBUS AV,Address_BROADWAY ST / KEARNY ST,Address_CAPP ST / 17TH ST,Address_CASTRO ST / MARKET ST,Address_EDDY ST / HYDE ST,Address_EDDY ST / JONES ST,Address_EDDY ST / LARKIN ST,Address_EDDY ST / LEAVENWORTH ST,Address_EDDY ST / MASON ST,Address_EDDY ST / TAYLOR ST,Address_ELLIS ST / HYDE ST,Address_ELLIS ST / JONES ST,Address_ELLIS ST / LEAVENWORTH ST,Address_FILLMORE ST / GEARY BL,Address_GEARY BL / FILLMORE ST,Address_GEARY BL / LAGUNA ST,Address_GEARY BL / STEINER ST,Address_GEARY BL / WEBSTER ST,Address_GEARY ST / LARKIN ST,Address_GEARY ST / POWELL ST,Address_GENEVA AV / MISSION ST,Address_GOLDEN GATE AV / HYDE ST,Address_GOLDEN GATE AV / JONES ST,Address_GOLDEN GATE AV / LEAVENWORTH ST,Address_HAIGHT ST / ASHBURY ST,Address_HAIGHT ST / COLE ST,Address_HAIGHT ST / MASONIC AV,Address_HAIGHT ST / SHRADER ST,Address_HAIGHT ST / STANYAN ST,Address_HEMLOCK ST / LARKIN ST,Address_HYDE ST / EDDY ST,Address_HYDE ST / ELLIS ST,Address_HYDE ST / GOLDEN GATE AV,Address_HYDE ST / OFARRELL ST,Address_HYDE ST / TURK ST,Address_JONES ST / ELLIS ST,Address_JONES ST / GOLDEN GATE AV,Address_JONES ST / MCALLISTER ST,Address_JONES ST / OFARRELL ST,Address_JONES ST / TURK ST,Address_LARKIN ST / OFARRELL ST,Address_LARKIN ST / SUTTER ST,Address_LEAVENWORTH ST / ELLIS ST,Address_LEAVENWORTH ST / GOLDEN GATE AV,Address_LEAVENWORTH ST / TURK ST,Address_MARKET ST / 4TH ST,Address_MARKET ST / 5TH ST,Address_MARKET ST / 6TH ST,Address_MARKET ST / 7TH ST,Address_MARKET ST / 8TH ST,Address_MARKET ST / JONES ST,Address_MARKET ST / MASON ST,Address_MARKET ST / POWELL ST,Address_MARKET ST / TAYLOR ST,Address_MARKET ST / VANNESS AV,Address_MASON ST / TURK ST,Address_MISSION ST / 16TH ST,Address_MISSION ST / 17TH ST,Address_MISSION ST / 18TH ST,Address_MISSION ST / 24TH ST,Address_MISSION ST / 4TH ST,Address_MISSION ST / 5TH ST,Address_MISSION ST / 6TH ST,Address_MISSION ST / 7TH ST,Address_MISSION ST / 8TH ST,Address_MISSION ST / GENEVA AV,Address_MISSION ST / SILVER AV,Address_OFARRELL ST / HYDE ST,Address_OFARRELL ST / JONES ST,Address_OFARRELL ST / LARKIN ST,Address_POLK ST / CALIFORNIA ST,Address_POLK ST / POST ST,Address_POLK ST / SUTTER ST,Address_POST ST / LARKIN ST,Address_POWELL ST / GEARY ST,Address_POWELL ST / MARKET ST,Address_SHOTWELL ST / 17TH ST,Address_STOCKTON ST / SUTTER ST,Address_SUTTER ST / LARKIN ST,Address_SUTTER ST / STOCKTON ST,Address_TAYLOR ST / EDDY ST,Address_TAYLOR ST / TURK ST,Address_TURK ST / HYDE ST,Address_TURK ST / JONES ST,Address_TURK ST / LEAVENWORTH ST,Address_TURK ST / MASON ST,Address_TURK ST / TAYLOR ST
0,0,2015-05-10 23:59:00,Sunday,BAYVIEW,-122.4,37.735,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,2015-05-10 23:51:00,Sunday,BAYVIEW,-122.392,37.732,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2,2015-05-10 23:50:00,Sunday,NORTHERN,-122.426,37.792,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,3,2015-05-10 23:45:00,Sunday,INGLESIDE,-122.437,37.721,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,4,2015-05-10 23:45:00,Sunday,INGLESIDE,-122.437,37.721,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [506]:
# not sure why this is being such a cunt when i try to put it in the clean_data def

# set up dummies
dummy_DayOfWeek = pd.get_dummies(df['DayOfWeek'], prefix='Day')
del dummy_DayOfWeek['Day_Friday']
del df['DayOfWeek']
df = df.join(dummy_DayOfWeek)
dummy_PdDistrict = pd.get_dummies(df['PdDistrict'], prefix='District')
del dummy_PdDistrict['District_SOUTHERN']
del df['PdDistrict']
df = df.join(dummy_PdDistrict)


dummy_DayOfWeek = pd.get_dummies(df_test['DayOfWeek'], prefix='Day')
del dummy_DayOfWeek['Day_Friday']
del df_test['DayOfWeek']
df_test = df_test.join(dummy_DayOfWeek)
dummy_PdDistrict = pd.get_dummies(df_test['PdDistrict'], prefix='District')
del dummy_PdDistrict['District_SOUTHERN']
del df_test['PdDistrict']
df_test = df_test.join(dummy_PdDistrict)

In [507]:
df.head()

Unnamed: 0,Category,X,Y,Year,Month,Week,Day,Hour,Minute,StreetCorner,Day_Monday,Day_Saturday,Day_Sunday,Day_Thursday,Day_Tuesday,Day_Wednesday,District_BAYVIEW,District_CENTRAL,District_INGLESIDE,District_MISSION,District_NORTHERN,District_PARK,District_RICHMOND,District_TARAVAL,District_TENDERLOIN
0,WARRANTS,-122.426,37.775,2015,5,20,13,23,23,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0
1,OTHER OFFENSES,-122.426,37.775,2015,5,20,13,23,23,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0
2,OTHER OFFENSES,-122.424,37.8,2015,5,20,13,23,3,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0
3,LARCENY/THEFT,-122.427,37.801,2015,5,20,13,23,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0
4,LARCENY/THEFT,-122.439,37.772,2015,5,20,13,23,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0


In [508]:
df_test.head()

Unnamed: 0,Id,X,Y,Year,Month,Week,Day,Hour,Minute,StreetCorner,Day_Monday,Day_Saturday,Day_Sunday,Day_Thursday,Day_Tuesday,Day_Wednesday,District_BAYVIEW,District_CENTRAL,District_INGLESIDE,District_MISSION,District_NORTHERN,District_PARK,District_RICHMOND,District_TARAVAL,District_TENDERLOIN
0,0,-122.4,37.735,2015,5,19,10,23,29,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0
1,1,-122.392,37.732,2015,5,19,10,23,21,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0
2,2,-122.426,37.792,2015,5,19,10,23,20,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0
3,3,-122.437,37.721,2015,5,19,10,23,15,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0
4,4,-122.437,37.721,2015,5,19,10,23,15,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0


In [509]:
# as noted in data exploration file, there are some latitude and longitude values that are obviously incorrect. 
#Impute these with the median.

# fill incorrect values with NaN
df['X'].replace(-120.5, np.nan, inplace = True)
df['Y'].replace(90, np.nan, inplace = True)

# find median for median imputation. Save values so I can reuse for test file.
medianX = df['X'].median()
medianY = df['Y'].median()

# median imputation
df['X'] = df['X'].fillna(medianX)
df['Y'] = df['Y'].fillna(medianY)

In [510]:
#median imputation in test file
# fill incorrect values with NaN
df_test['X'].replace(-120.5, np.nan, inplace = True)
df_test['Y'].replace(90, np.nan, inplace = True)

# median imputation
df_test['X'] = df_test['X'].fillna(medianX)
df_test['Y'] = df_test['Y'].fillna(medianY)

## Target Vector and Feature Matrix

In [511]:
#target vector y
y = df['Category']
y.head()

0          WARRANTS
1    OTHER OFFENSES
2    OTHER OFFENSES
3     LARCENY/THEFT
4     LARCENY/THEFT
Name: Category, dtype: object

In [512]:
#Matrix of X's.
X = df
del X['Category']
X.head()

Unnamed: 0,X,Y,Year,Month,Week,Day,Hour,Minute,StreetCorner,Day_Monday,Day_Saturday,Day_Sunday,Day_Thursday,Day_Tuesday,Day_Wednesday,District_BAYVIEW,District_CENTRAL,District_INGLESIDE,District_MISSION,District_NORTHERN,District_PARK,District_RICHMOND,District_TARAVAL,District_TENDERLOIN
0,-122.426,37.775,2015,5,20,13,23,23,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0
1,-122.426,37.775,2015,5,20,13,23,23,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0
2,-122.424,37.8,2015,5,20,13,23,3,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0
3,-122.427,37.801,2015,5,20,13,23,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0
4,-122.439,37.772,2015,5,20,13,23,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0


In [513]:
# scale data with zero mean and unit variance
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler(copy=True)  
# make a df called x_continous that has just our continous features
ContinuousCols = ['X','Y', 'Year', 'Week', 'Day', 'Hour', 'Minute']
X_continuous = X[ContinuousCols]

# scale to zero mean and unit variance
X_continuous = scaler.fit(X_continuous).transform(X_continuous)
X_continuous = pd.DataFrame(X_continuous, columns = ContinuousCols)

# delete unscaled cols form original X df
X = X.drop(ContinuousCols, axis=1)

# merge 
X = pd.concat([X_continuous, X], axis=1)
X.head()

Unnamed: 0,X,Y,Year,Week,Day,Hour,Minute,Month,StreetCorner,Day_Monday,Day_Saturday,Day_Sunday,Day_Thursday,Day_Tuesday,Day_Wednesday,District_BAYVIEW,District_CENTRAL,District_INGLESIDE,District_MISSION,District_NORTHERN,District_PARK,District_RICHMOND,District_TARAVAL,District_TENDERLOIN
0,-0.124,0.313,1.732,-0.426,-0.293,1.464,1.766,5,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0
1,-0.124,0.313,1.732,-0.426,-0.293,1.464,1.766,5,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0
2,-0.063,1.381,1.732,-0.426,-0.293,1.464,0.691,5,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0
3,-0.167,1.4,1.732,-0.426,-0.293,1.464,0.529,5,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0
4,-0.632,0.186,1.732,-0.426,-0.293,1.464,0.529,5,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0


In [514]:
# scale test data with zero mean and unit variance as well. Use same scaler object I created on my training data.

kaggle_X = df_test

# make a df called x_continous that has just our continous features
kaggle_X_continuous = kaggle_X[ContinuousCols]
# scale to zero mean and unit variance
kaggle_X_continuous = scaler.transform(kaggle_X_continuous)
kaggle_X_continuous = pd.DataFrame(kaggle_X_continuous, columns = ContinuousCols)
# delete unscaled cols form original kaggle_X df
kaggle_X = kaggle_X.drop(ContinuousCols, axis=1)

# merge 
kaggle_X = pd.concat([kaggle_X_continuous, kaggle_X], axis=1)
kaggle_X.head()

Unnamed: 0,X,Y,Year,Week,Day,Hour,Minute,Id,Month,StreetCorner,Day_Monday,Day_Saturday,Day_Sunday,Day_Thursday,Day_Tuesday,Day_Wednesday,District_BAYVIEW,District_CENTRAL,District_INGLESIDE,District_MISSION,District_NORTHERN,District_PARK,District_RICHMOND,District_TARAVAL,District_TENDERLOIN
0,0.917,-1.324,1.732,-0.493,-0.634,1.464,2.089,0,5,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0
1,1.236,-1.432,1.732,-0.493,-0.634,1.464,1.659,1,5,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0
2,-0.128,1.042,1.732,-0.493,-0.634,1.464,1.605,2,5,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0
3,-0.579,-1.888,1.732,-0.493,-0.634,1.464,1.336,3,5,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0
4,-0.579,-1.888,1.732,-0.493,-0.634,1.464,1.336,4,5,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0


In [515]:
# delete the id column for now so that we can run our classifier
ids = kaggle_X['Id']
del kaggle_X['Id']


xgb = XGBClassifier(objective = 'multi:softprob', max_depth = 6, learning_rate = 1.0, max_delta_step = 1, seed=0)
xgb.fit(X, y)
predictions = pd.DataFrame(xgb.predict_proba(kaggle_X), columns=xgb.classes_)

# grid search below
'''
xgb = XGBClassifier()

from sklearn.grid_search import GridSearchCV
param_grid = {'max_depth': np.arange(3, 12)}
grid = GridSearchCV(xgb, param_grid, n_jobs=4)
grid.fit(X, y)
print grid.grid_scores_
print grid.best_score_
print grid.best_estimator_
print grid.best_params_
'''

"\nxgb = XGBClassifier()\n\nfrom sklearn.grid_search import GridSearchCV\nparam_grid = {'max_depth': np.arange(3, 12)}\ngrid = GridSearchCV(xgb, param_grid, n_jobs=4)\ngrid.fit(X, y)\nprint grid.grid_scores_\nprint grid.best_score_\nprint grid.best_estimator_\nprint grid.best_params_\n"

In [516]:
predictions.head()

Unnamed: 0,ARSON,ASSAULT,BAD CHECKS,BRIBERY,BURGLARY,DISORDERLY CONDUCT,DRIVING UNDER THE INFLUENCE,DRUG/NARCOTIC,DRUNKENNESS,EMBEZZLEMENT,EXTORTION,FAMILY OFFENSES,FORGERY/COUNTERFEITING,FRAUD,GAMBLING,KIDNAPPING,LARCENY/THEFT,LIQUOR LAWS,LOITERING,MISSING PERSON,NON-CRIMINAL,OTHER OFFENSES,PORNOGRAPHY/OBSCENE MAT,PROSTITUTION,RECOVERED VEHICLE,ROBBERY,RUNAWAY,SECONDARY CODES,SEX OFFENSES FORCIBLE,SEX OFFENSES NON FORCIBLE,STOLEN PROPERTY,SUICIDE,SUSPICIOUS OCC,TREA,TRESPASS,VANDALISM,VEHICLE THEFT,WARRANTS,WEAPON LAWS
0,0.0003173,0.036,2.963e-08,1.702e-07,0.09975,2.809e-05,0.0005056,0.016,0.005111,1.208e-05,9.3e-07,5.896e-05,0.0001871,0.001,6.216e-07,4.823e-05,0.031,5.908e-05,2.465e-07,0.165,0.076,0.111,1.091e-09,1.938e-06,1.298e-06,0.004,5.724e-05,0.01,3.019e-05,5.616e-08,0.0009122,7.445e-06,0.068,1.494e-07,0.0005601,0.097,0.264,0.012,0.001
1,9.87e-05,0.007,1.097e-08,2.178e-06,0.0004919,1.684e-05,0.002233,0.05,0.000368,9.431e-06,6.44e-08,1.218e-06,0.001587,0.002,5.22e-05,2.067e-05,0.005,0.001296,5.297e-07,0.021,0.026,0.566,3.563e-10,7.427e-08,7.572e-07,0.023,2.198e-06,0.004,7.207e-05,2.38e-07,0.002024,1.302e-05,0.066,2.28e-07,0.0001368,0.007,0.084,0.102,0.029
2,0.001593,0.111,1.274e-06,5.61e-08,0.052,0.0005595,0.0002295,0.041,0.0004642,1.273e-05,3.233e-07,4.64e-06,0.001449,0.005,1.674e-08,0.0004919,0.35,7.399e-06,4.745e-09,0.019,0.055,0.058,8.201e-10,6.686e-06,1.933e-06,0.032,4.141e-07,0.004,0.05123,3.668e-08,0.01111,3.056e-07,0.021,2.344e-08,0.00217,0.122,0.044,0.011,0.006
3,0.0009131,0.126,2.386e-08,2.11e-05,0.03084,0.001222,0.0007371,0.007,0.001391,5.206e-06,1.032e-05,0.001118,0.001712,0.006,2.185e-06,0.000275,0.054,0.0006665,1.62e-06,0.057,0.177,0.07,6.416e-10,0.0003013,1.539e-06,0.1,3.507e-05,0.006,0.000335,1.66e-07,0.001039,7.694e-06,0.053,1.571e-07,0.007144,0.088,0.12,0.021,0.068
4,0.0009131,0.126,2.386e-08,2.11e-05,0.03084,0.001222,0.0007371,0.007,0.001391,5.206e-06,1.032e-05,0.001118,0.001712,0.006,2.185e-06,0.000275,0.054,0.0006665,1.62e-06,0.057,0.177,0.07,6.416e-10,0.0003013,1.539e-06,0.1,3.507e-05,0.006,0.000335,1.66e-07,0.001039,7.694e-06,0.053,1.571e-07,0.007144,0.088,0.12,0.021,0.068


In [517]:
# put the id column back
predictions = pd.concat([ids, predictions], axis=1)
predictions.head()

Unnamed: 0,Id,ARSON,ASSAULT,BAD CHECKS,BRIBERY,BURGLARY,DISORDERLY CONDUCT,DRIVING UNDER THE INFLUENCE,DRUG/NARCOTIC,DRUNKENNESS,EMBEZZLEMENT,EXTORTION,FAMILY OFFENSES,FORGERY/COUNTERFEITING,FRAUD,GAMBLING,KIDNAPPING,LARCENY/THEFT,LIQUOR LAWS,LOITERING,MISSING PERSON,NON-CRIMINAL,OTHER OFFENSES,PORNOGRAPHY/OBSCENE MAT,PROSTITUTION,RECOVERED VEHICLE,ROBBERY,RUNAWAY,SECONDARY CODES,SEX OFFENSES FORCIBLE,SEX OFFENSES NON FORCIBLE,STOLEN PROPERTY,SUICIDE,SUSPICIOUS OCC,TREA,TRESPASS,VANDALISM,VEHICLE THEFT,WARRANTS,WEAPON LAWS
0,0,0.0003173,0.036,2.963e-08,1.702e-07,0.09975,2.809e-05,0.0005056,0.016,0.005111,1.208e-05,9.3e-07,5.896e-05,0.0001871,0.001,6.216e-07,4.823e-05,0.031,5.908e-05,2.465e-07,0.165,0.076,0.111,1.091e-09,1.938e-06,1.298e-06,0.004,5.724e-05,0.01,3.019e-05,5.616e-08,0.0009122,7.445e-06,0.068,1.494e-07,0.0005601,0.097,0.264,0.012,0.001
1,1,9.87e-05,0.007,1.097e-08,2.178e-06,0.0004919,1.684e-05,0.002233,0.05,0.000368,9.431e-06,6.44e-08,1.218e-06,0.001587,0.002,5.22e-05,2.067e-05,0.005,0.001296,5.297e-07,0.021,0.026,0.566,3.563e-10,7.427e-08,7.572e-07,0.023,2.198e-06,0.004,7.207e-05,2.38e-07,0.002024,1.302e-05,0.066,2.28e-07,0.0001368,0.007,0.084,0.102,0.029
2,2,0.001593,0.111,1.274e-06,5.61e-08,0.052,0.0005595,0.0002295,0.041,0.0004642,1.273e-05,3.233e-07,4.64e-06,0.001449,0.005,1.674e-08,0.0004919,0.35,7.399e-06,4.745e-09,0.019,0.055,0.058,8.201e-10,6.686e-06,1.933e-06,0.032,4.141e-07,0.004,0.05123,3.668e-08,0.01111,3.056e-07,0.021,2.344e-08,0.00217,0.122,0.044,0.011,0.006
3,3,0.0009131,0.126,2.386e-08,2.11e-05,0.03084,0.001222,0.0007371,0.007,0.001391,5.206e-06,1.032e-05,0.001118,0.001712,0.006,2.185e-06,0.000275,0.054,0.0006665,1.62e-06,0.057,0.177,0.07,6.416e-10,0.0003013,1.539e-06,0.1,3.507e-05,0.006,0.000335,1.66e-07,0.001039,7.694e-06,0.053,1.571e-07,0.007144,0.088,0.12,0.021,0.068
4,4,0.0009131,0.126,2.386e-08,2.11e-05,0.03084,0.001222,0.0007371,0.007,0.001391,5.206e-06,1.032e-05,0.001118,0.001712,0.006,2.185e-06,0.000275,0.054,0.0006665,1.62e-06,0.057,0.177,0.07,6.416e-10,0.0003013,1.539e-06,0.1,3.507e-05,0.006,0.000335,1.66e-07,0.001039,7.694e-06,0.053,1.571e-07,0.007144,0.088,0.12,0.021,0.068


In [518]:
predictions.to_csv('submission12.csv',index=False,compression='gzip')