Featurization for Dataset

In [54]:
! pip install mpu --quiet

In [55]:
import warnings
warnings.filterwarnings('ignore')

import json
import os
import pickle
import pandas as pd
import numpy as np

from mpu import haversine_distance
from tqdm import tqdml

from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest
from sklearn.feature_extraction.text import (
    CountVectorizer,
    TfidfVectorizer
)

Elementary Data Analysis

In [56]:
df_train = pd.read_csv('datasets/train.csv')
df_train

Unnamed: 0,Dates,Category,Descript,DayOfWeek,PdDistrict,Resolution,Address,X,Y
0,2015-05-13 23:53:00,WARRANTS,WARRANT ARREST,Wednesday,NORTHERN,"ARREST, BOOKED",OAK ST / LAGUNA ST,-122.425892,37.774599
1,2015-05-13 23:53:00,OTHER OFFENSES,TRAFFIC VIOLATION ARREST,Wednesday,NORTHERN,"ARREST, BOOKED",OAK ST / LAGUNA ST,-122.425892,37.774599
2,2015-05-13 23:33:00,OTHER OFFENSES,TRAFFIC VIOLATION ARREST,Wednesday,NORTHERN,"ARREST, BOOKED",VANNESS AV / GREENWICH ST,-122.424363,37.800414
3,2015-05-13 23:30:00,LARCENY/THEFT,GRAND THEFT FROM LOCKED AUTO,Wednesday,NORTHERN,NONE,1500 Block of LOMBARD ST,-122.426995,37.800873
4,2015-05-13 23:30:00,LARCENY/THEFT,GRAND THEFT FROM LOCKED AUTO,Wednesday,PARK,NONE,100 Block of BRODERICK ST,-122.438738,37.771541
...,...,...,...,...,...,...,...,...,...
878044,2003-01-06 00:15:00,ROBBERY,ROBBERY ON THE STREET WITH A GUN,Monday,TARAVAL,NONE,FARALLONES ST / CAPITOL AV,-122.459033,37.714056
878045,2003-01-06 00:01:00,LARCENY/THEFT,GRAND THEFT FROM LOCKED AUTO,Monday,INGLESIDE,NONE,600 Block of EDNA ST,-122.447364,37.731948
878046,2003-01-06 00:01:00,LARCENY/THEFT,GRAND THEFT FROM LOCKED AUTO,Monday,SOUTHERN,NONE,5TH ST / FOLSOM ST,-122.403390,37.780266
878047,2003-01-06 00:01:00,VANDALISM,"MALICIOUS MISCHIEF, VANDALISM OF VEHICLES",Monday,SOUTHERN,NONE,TOWNSEND ST / 2ND ST,-122.390531,37.780607


In [57]:
df_train['Category'].value_counts()

LARCENY/THEFT                  174900
OTHER OFFENSES                 126182
NON-CRIMINAL                    92304
ASSAULT                         76876
DRUG/NARCOTIC                   53971
VEHICLE THEFT                   53781
VANDALISM                       44725
WARRANTS                        42214
BURGLARY                        36755
SUSPICIOUS OCC                  31414
MISSING PERSON                  25989
ROBBERY                         23000
FRAUD                           16679
FORGERY/COUNTERFEITING          10609
SECONDARY CODES                  9985
WEAPON LAWS                      8555
PROSTITUTION                     7484
TRESPASS                         7326
STOLEN PROPERTY                  4540
SEX OFFENSES FORCIBLE            4388
DISORDERLY CONDUCT               4320
DRUNKENNESS                      4280
RECOVERED VEHICLE                3138
KIDNAPPING                       2341
DRIVING UNDER THE INFLUENCE      2268
RUNAWAY                          1946
LIQUOR LAWS 

In [58]:
df_test = pd.read_csv('datasets/test.csv')
df_test

Unnamed: 0,Id,Dates,DayOfWeek,PdDistrict,Address,X,Y
0,0,2015-05-10 23:59:00,Sunday,BAYVIEW,2000 Block of THOMAS AV,-122.399588,37.735051
1,1,2015-05-10 23:51:00,Sunday,BAYVIEW,3RD ST / REVERE AV,-122.391523,37.732432
2,2,2015-05-10 23:50:00,Sunday,NORTHERN,2000 Block of GOUGH ST,-122.426002,37.792212
3,3,2015-05-10 23:45:00,Sunday,INGLESIDE,4700 Block of MISSION ST,-122.437394,37.721412
4,4,2015-05-10 23:45:00,Sunday,INGLESIDE,4700 Block of MISSION ST,-122.437394,37.721412
...,...,...,...,...,...,...,...
884257,884257,2003-01-01 00:01:00,Wednesday,MISSION,2600 Block of BRYANT ST,-122.408983,37.751987
884258,884258,2003-01-01 00:01:00,Wednesday,NORTHERN,1900 Block of WASHINGTON ST,-122.425342,37.792681
884259,884259,2003-01-01 00:01:00,Wednesday,INGLESIDE,5500 Block of MISSION ST,-122.445418,37.712075
884260,884260,2003-01-01 00:01:00,Wednesday,BAYVIEW,1500 Block of HUDSON AV,-122.387394,37.739479


In [59]:
df_train.shape

(878049, 9)

In [60]:
df_test.shape

(884262, 7)

In [61]:
df_test.columns

Index(['Id', 'Dates', 'DayOfWeek', 'PdDistrict', 'Address', 'X', 'Y'], dtype='object')

In [62]:
#Renaming columns into easier names
renamed_cols = ['time', 'category', 'description', 'weekday', 'police_dept', 
                      'resolution', 'address', 'longitude', 'latitude']
df_train.columns = renamed_cols
test_cols_renamed = ['id', 'time', 'weekday', 'police_dept', 'address', 'longitude', 'latitude']
df_test.columns = test_cols_renamed

In [63]:
df_train.drop(columns=['description', 'resolution'], axis=1, inplace=True)

In [64]:
df_train.head()

Unnamed: 0,time,category,weekday,police_dept,address,longitude,latitude
0,2015-05-13 23:53:00,WARRANTS,Wednesday,NORTHERN,OAK ST / LAGUNA ST,-122.425892,37.774599
1,2015-05-13 23:53:00,OTHER OFFENSES,Wednesday,NORTHERN,OAK ST / LAGUNA ST,-122.425892,37.774599
2,2015-05-13 23:33:00,OTHER OFFENSES,Wednesday,NORTHERN,VANNESS AV / GREENWICH ST,-122.424363,37.800414
3,2015-05-13 23:30:00,LARCENY/THEFT,Wednesday,NORTHERN,1500 Block of LOMBARD ST,-122.426995,37.800873
4,2015-05-13 23:30:00,LARCENY/THEFT,Wednesday,PARK,100 Block of BRODERICK ST,-122.438738,37.771541


In [65]:
df_test.head()

Unnamed: 0,id,time,weekday,police_dept,address,longitude,latitude
0,0,2015-05-10 23:59:00,Sunday,BAYVIEW,2000 Block of THOMAS AV,-122.399588,37.735051
1,1,2015-05-10 23:51:00,Sunday,BAYVIEW,3RD ST / REVERE AV,-122.391523,37.732432
2,2,2015-05-10 23:50:00,Sunday,NORTHERN,2000 Block of GOUGH ST,-122.426002,37.792212
3,3,2015-05-10 23:45:00,Sunday,INGLESIDE,4700 Block of MISSION ST,-122.437394,37.721412
4,4,2015-05-10 23:45:00,Sunday,INGLESIDE,4700 Block of MISSION ST,-122.437394,37.721412


Time Manipulation

In [66]:
def extract_date(time):
    #Extract data from time
    return time.split(' ')[0]

def extract_year(date):
    #Extract year from date
    return int(date.split('-')[0])

def extract_month(date):
    #Extract month from date
    return int(date.split('-')[1])

def extract_day(date):
    #Extract day from date
    return int(date.split('-')[2])

def extract_hour(time):
    #Extract hour from time
    date, hms = time.split(' ')
    return int(hms.split(':')[0])

def extract_minute(time):
    #Extract minute from time
    date, hms = time.split(' ')
    return int(hms.split(':')[1])

def extract_season(month):
    #Determine season from month
    if month in [4, 5, 6]:
        return 'summer'
    elif month in [7, 8, 9]:
        return 'rainy'
    elif month in [10, 11, 12]:
        return 'winter'
    return 'spring'

def extract_hour_type(hour):
    #Determine hour type from hour
    if (hour >= 4) and (hour < 12):
        return 'morning'
    elif (hour >= 12) and (hour < 15):
        return 'noon'
    elif (hour >= 15) and (hour < 18):
        return 'evening'
    elif (hour >= 18) and (hour < 22):
        return 'night'
    return 'mid-night'

def extract_time_period(hour):
    #Determine the time period from hour
    if hour in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]:
        return 'am'
    return 'pm'

In [67]:
def title_text(text):
    #Title the text
    if isinstance(text, str):
        text = text.title()
        return text
    return text

In [68]:
def extract_address_type(addr):
    #Extract address type if it is a Street or Cross
    if ' / ' in addr:
        return 'Cross'
    addr_sep = addr.split(' ')
    addr_type = addr_sep[-1]
    return addr_type

In [69]:
def write_temporal_address_features(df, path):
    """Writing the temporal based features"""
    
    ### Adding temporal features
    df['date'] = df['time'].apply(func=extract_date)
    df['year'] = df['date'].apply(func=extract_year)
    df['month'] = df['date'].apply(func=extract_month)
    df['day'] = df['date'].apply(func=extract_day)
    df['hour'] = df['time'].apply(func=extract_hour)
    df['minute'] = df['time'].apply(func=extract_minute)
    df['season'] = df['month'].apply(func=extract_season)
    df['hour_type'] = df['hour'].apply(func=extract_hour_type)
    df['time_period'] = df['hour'].apply(func=extract_time_period)
    
    ### Adding address type
    df['address_type'] = df['address'].apply(func=extract_address_type)
    
    ### Text titling
    df = df.applymap(func=title_text)
    
    ### Writing
    df.to_csv(path_or_buf=path, index=None)
    
    return True

In [70]:
if (
    not os.path.isfile('datasets/df_train_time_address_cleaned.csv') and
    not os.path.isfile('datasets/df_test_time_address_cleaned.csv')
   ):
    # Training
    write_temporal_address_features(df_train, 'datasets/df_train_time_address_cleaned.csv')
    # Test
    write_temporal_address_features(df_test, 'datasets/df_test_time_address_cleaned.csv')

else:
    print("Data already exists in the directory.")
    df_train = pd.read_csv('datasets/df_train_time_address_cleaned.csv')
    df_test = pd.read_csv('datasets/df_test_time_address_cleaned.csv')

Data already exists in the directory.


In [71]:
df_train

Unnamed: 0,time,category,weekday,police_dept,address,longitude,latitude,date,year,month,day,hour,minute,season,hour_type,time_period,address_type
0,2015-05-13 23:53:00,Warrants,Wednesday,Northern,Oak St / Laguna St,-122.425892,37.774599,2015-05-13,2015,5,13,23,53,Summer,Mid-Night,Pm,Cross
1,2015-05-13 23:53:00,Other Offenses,Wednesday,Northern,Oak St / Laguna St,-122.425892,37.774599,2015-05-13,2015,5,13,23,53,Summer,Mid-Night,Pm,Cross
2,2015-05-13 23:33:00,Other Offenses,Wednesday,Northern,Vanness Av / Greenwich St,-122.424363,37.800414,2015-05-13,2015,5,13,23,33,Summer,Mid-Night,Pm,Cross
3,2015-05-13 23:30:00,Larceny/Theft,Wednesday,Northern,1500 Block Of Lombard St,-122.426995,37.800873,2015-05-13,2015,5,13,23,30,Summer,Mid-Night,Pm,St
4,2015-05-13 23:30:00,Larceny/Theft,Wednesday,Park,100 Block Of Broderick St,-122.438738,37.771541,2015-05-13,2015,5,13,23,30,Summer,Mid-Night,Pm,St
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
878044,2003-01-06 00:15:00,Robbery,Monday,Taraval,Farallones St / Capitol Av,-122.459033,37.714056,2003-01-06,2003,1,6,0,15,Spring,Mid-Night,Am,Cross
878045,2003-01-06 00:01:00,Larceny/Theft,Monday,Ingleside,600 Block Of Edna St,-122.447364,37.731948,2003-01-06,2003,1,6,0,1,Spring,Mid-Night,Am,St
878046,2003-01-06 00:01:00,Larceny/Theft,Monday,Southern,5Th St / Folsom St,-122.403390,37.780266,2003-01-06,2003,1,6,0,1,Spring,Mid-Night,Am,Cross
878047,2003-01-06 00:01:00,Vandalism,Monday,Southern,Townsend St / 2Nd St,-122.390531,37.780607,2003-01-06,2003,1,6,0,1,Spring,Mid-Night,Am,Cross


In [72]:
df_test

Unnamed: 0,id,time,weekday,police_dept,address,longitude,latitude,date,year,month,day,hour,minute,season,hour_type,time_period,address_type
0,0,2015-05-10 23:59:00,Sunday,Bayview,2000 Block Of Thomas Av,-122.399588,37.735051,2015-05-10,2015,5,10,23,59,Summer,Mid-Night,Pm,Av
1,1,2015-05-10 23:51:00,Sunday,Bayview,3Rd St / Revere Av,-122.391523,37.732432,2015-05-10,2015,5,10,23,51,Summer,Mid-Night,Pm,Cross
2,2,2015-05-10 23:50:00,Sunday,Northern,2000 Block Of Gough St,-122.426002,37.792212,2015-05-10,2015,5,10,23,50,Summer,Mid-Night,Pm,St
3,3,2015-05-10 23:45:00,Sunday,Ingleside,4700 Block Of Mission St,-122.437394,37.721412,2015-05-10,2015,5,10,23,45,Summer,Mid-Night,Pm,St
4,4,2015-05-10 23:45:00,Sunday,Ingleside,4700 Block Of Mission St,-122.437394,37.721412,2015-05-10,2015,5,10,23,45,Summer,Mid-Night,Pm,St
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
884257,884257,2003-01-01 00:01:00,Wednesday,Mission,2600 Block Of Bryant St,-122.408983,37.751987,2003-01-01,2003,1,1,0,1,Spring,Mid-Night,Am,St
884258,884258,2003-01-01 00:01:00,Wednesday,Northern,1900 Block Of Washington St,-122.425342,37.792681,2003-01-01,2003,1,1,0,1,Spring,Mid-Night,Am,St
884259,884259,2003-01-01 00:01:00,Wednesday,Ingleside,5500 Block Of Mission St,-122.445418,37.712075,2003-01-01,2003,1,1,0,1,Spring,Mid-Night,Am,St
884260,884260,2003-01-01 00:01:00,Wednesday,Bayview,1500 Block Of Hudson Av,-122.387394,37.739479,2003-01-01,2003,1,1,0,1,Spring,Mid-Night,Am,Av


One-Hot Encoding for time based features

In [73]:
def split_categories_numericals(df):
    #Identifying the numerical and categorical columns separately
    cols = list(df.columns)
    num_cols = list(df._get_numeric_data().columns)
    cate_cols = list(set(cols) - set(num_cols))
    return cate_cols, num_cols

In [74]:
ignore_columns = ['category', 'time', 'address', 'date'] #columns to be ignored for One hot encoding

def extract_feature_dummies(df, column):
    #One-Hot-Encoding using Pandas
    col_df = df[column]
    return pd.get_dummies(data=col_df)

def encode_multiple_columns(df, ignore_columns=ignore_columns):
    #Encoding multiple columns and vertically stacking them
    cate_cols, num_cols = split_categories_numericals(df=df)
    
    multi_feature_dummies = [df[num_cols]]
    for i in cate_cols:
        if i not in ignore_columns:
            d = extract_feature_dummies(df=df, column=i)
            multi_feature_dummies.append(d)

    encoded_data = pd.concat(multi_feature_dummies, axis=1)
    
    return encoded_data

Extracting Spatial Distance Features

In [75]:
sf_pstations_tourists = {
    "sfpd"                : [37.7725, -122.3894],
    "ingleside"           : [37.7247, -122.4463],
    "central"             : [37.7986, -122.4101],
    "northern"            : [37.7802, -122.4324],
    "mission"             : [37.7628, -122.4220],
    "tenderloin"          : [37.7838, -122.4129],
    "taraval"             : [37.7437, -122.4815],
    "sfpd park"           : [37.7678, -122.4552],
    "bayview"             : [37.7298, -122.3977],
    "kma438 sfpd"         : [37.7725, -122.3894],
    "richmond"            : [37.7801, -122.4644],
    "police commission"   : [37.7725, -122.3894],
    "juvenile"            : [37.7632, -122.4220],
    "southern"            : [37.6556, -122.4366],
    "sfpd pistol range"   : [37.7200, -122.4996],
    "sfpd public affairs" : [37.7754, -122.4039],
    "broadmoor"           : [37.6927, -122.4748],
    "napa wine country"      : [38.2975, -122.2869],
    "sonoma wine country"    : [38.2919, -122.4580],
    "muir woods"             : [37.8970, -122.5811],
    "golden gate"            : [37.8199, -122.4783],
    "yosemite national park" : [37.865101, -119.538330],
}

In [76]:
def get_distance(ij):
    #Get distance from two coordinates
    i = ij[0]
    j = ij[1]
    distance = haversine_distance(origin=i, destination=j)
    return distance

def extract_spatial_distance_feature(df, lat_column, lon_column, pname, pcoords):
    #Compute the distance between pcoords and all the feature values
    lat_vals = df[lat_column].to_list()
    lon_vals = df[lon_column].to_list()
    
    df_coords = list(zip(lat_vals, lon_vals))
    pcoords_df_coords_combines = zip([pcoords] * len(df), df_coords)
    
    f = pd.DataFrame()
    distances = list(map(get_distance, pcoords_df_coords_combines))
    f[pname] = distances
    
    return f

In [77]:
def extract_spatial_distance_multi_features(df, lat_column, lon_column, stations=sf_pstations_tourists):
    #Compute the spatial distance for multiple features and vertical stacking them
    sfeatures = []
    
    for pname, pcoords in stations.items():
        # print(pname, pcoords)
        sf = extract_spatial_distance_feature(df, lat_column, lon_column, pname, pcoords)
        sfeatures.append(sf)
    
    spatial_distances = pd.concat(sfeatures, axis=1)
    return spatial_distances

Extracting Latitude and Longitude based features

In [78]:
def lat_lon_sum(ll):
    #Return the sum of lat and lon
    lat = ll[0]
    lon = ll[1]
    return lat + lon

def lat_lon_diff(ll):
    #Return the diff of lat and lon
    lat = ll[0]
    lon = ll[1]
    return lon - lat

def lat_lon_sum_square(ll):
    #Return the square of sum of lat and lon
    lat = ll[0]
    lon = ll[1]
    return (lat + lon) ** 2

def lat_lon_diff_square(ll):
    #Return the square of diff of lat and lon
    lat = ll[0]
    lon = ll[1]
    return (lat - lon) ** 2

def lat_lon_sum_sqrt(ll):
    #Return the sqrt of sum of lat and lon
    lat = ll[0]
    lon = ll[1]
    return (lat**2 + lon**2) ** (1 / 2)

def lat_lon_diff_sqrt(ll):
    #Return the sqrt of diff of lat and lon
    lat = ll[0]
    lon = ll[1]
    return (lon**2 - lat**2) ** (1 / 2)

In [79]:
def features_by_lat_lon(df, lat_column, lon_column):
    """Compute all lat lon based features"""
    
    df_lats = df[lat_column].to_list()
    df_lons = df[lon_column].to_list()
    ll_zipped = list(zip(df_lats, df_lons))

    df_ll = pd.DataFrame()
    df_ll['lat_lon_sum'] = list(map(lat_lon_sum, ll_zipped))
    df_ll['lat_lon_diff'] = list(map(lat_lon_diff, ll_zipped))
    df_ll['lat_lon_sum_square'] = list(map(lat_lon_sum_square, ll_zipped))
    df_ll['lat_lon_diff_square'] = list(map(lat_lon_diff_square, ll_zipped))
    df_ll['lat_lon_sum_sqrt'] = list(map(lat_lon_sum_sqrt, ll_zipped))
    df_ll['lat_lon_diff_sqrt'] = list(map(lat_lon_diff_sqrt, ll_zipped))

    return df_ll

Bag of Words Representation for Address

In [80]:
best_bow_columns = np.array([])

In [81]:
def create_bow_vectorizer(df, column, target='category', write_vect=True, kbest=20):
    model_name = 'vect_bow_{}.pkl'.format(column)
    print(model_name)
    df_col_val = df[column]

    if not os.path.isfile(path='models/' + model_name):
        vect = CountVectorizer()
        vect.fit(raw_documents=df_col_val)
        pickle.dump(vect, open('models/' + model_name, "wb"))
        
    else:
        print("Model already exists in the directory.")
        vect = pickle.load(open('models/' + model_name, "rb"))
    
    df_col_features = vect.transform(raw_documents=df_col_val)
    global best_bow_columns
    
    if kbest:        
        if best_bow_columns.any():
            return pd.DataFrame(df_col_features[:, best_bow_columns].toarray(), columns=best_bow_columns)
        else:    
            fs = SelectKBest(k=kbest)
            fs.fit(df_col_features, df[target])
            df_col_features = fs.transform(df_col_features)
            best_bow_columns = fs.get_support(indices=True)
            return pd.DataFrame(df_col_features.toarray(), columns=best_bow_columns)
            

Tfidf Representation of Address

In [82]:
best_tfidf_cols = np.array([])

In [83]:
def create_tfidf_vectorizer(df, column, target='category', write_vect=True, kbest=20):
    model_name = 'vect_tfidf_{}.pkl'.format(column)
    print(model_name)
    df_col_val = df[column]

    if not os.path.isfile(path='models/' + model_name):
        vect = TfidfVectorizer()
        vect.fit(raw_documents=df_col_val)
        pickle.dump(vect, open('models/' + model_name, "wb"))
    else:
        print("Model already exists in the directory.")
        vect = pickle.load(open('models/' + model_name, "rb"))
    
    df_col_features = vect.transform(raw_documents=df_col_val)
    global best_tfidf_cols

    if kbest:
        if best_tfidf_cols.any():
            return pd.DataFrame(df_col_features[:, best_tfidf_cols].toarray(), columns=best_tfidf_cols)
        else:
            fs = SelectKBest(k=kbest)
            fs.fit(df_col_features, df[target])
            df_col_features = fs.transform(df_col_features)
            best_tfidf_cols = fs.get_support(indices=True)
            return pd.DataFrame(df_col_features.toarray(), columns=best_tfidf_cols)

Combination Process

In [84]:
def write_data_features(df, path, write_to_file=True):
    encoded_data = encode_multiple_columns(df=df)
    sd_features = extract_spatial_distance_multi_features(df=df, lat_column='latitude', lon_column='longitude')
    sll_features = features_by_lat_lon(df=df, lat_column='latitude', lon_column='longitude')
    #address_bow = create_bow_vectorizer(df=df, column='address')
    address_tfidf = create_tfidf_vectorizer(df=df, column='address')
    df_featurized = pd.concat([encoded_data, sd_features, sll_features,address_tfidf], axis=1)
    #df_featurized.columns = df_featurized.columns.str.lower()
    if write_to_file:
        df_featurized.to_csv(path_or_buf=path, index=None)
    
    return True

In [85]:
if (
    not os.path.isfile(path='datasets/train_data_features.csv') and
    not os.path.isfile(path='datasets/test_data_features.csv')
   ):
    # Training
    print("Train data")
    write_data_features(df=df_train, path='datasets/train_data_features.csv')
    print('-' * 30)
    #Test
    print("Test data")
    write_data_features(df=df_test, path='datasets/test_data_features.csv')
    print('-' * 30)

else:
    print("Data already exists in the directory.")

Train data
vect_tfidf_address.pkl
------------------------------
Test data
vect_tfidf_address.pkl
Model already exists in the directory.
------------------------------


In [86]:
df_train = pd.read_csv(filepath_or_buffer='datasets/train_data_features.csv')
df_test = pd.read_csv(filepath_or_buffer='datasets/test_data_features.csv')

In [87]:
df_train

Unnamed: 0,longitude,latitude,year,month,day,hour,minute,Rainy,Spring,Summer,Winter,Evening,Mid-Night,Morning,Night,Noon,/,Al,Av,Bl,Bufano,Cr,Cross,Ct,Dr,Ex,Ferlinghetti,Hwy,Hy,I-80,Ln,Mar,Palms,Park,Pl,Pz,Rd,Rw,St,Stwy,Ter,Tr,Way,Wk,Wy,Bayview,Central,Ingleside,Mission,Northern,Park.1,Richmond,Southern,Taraval,Tenderloin,Am,Pm,Friday,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday,sfpd,ingleside,central,northern,mission,tenderloin,taraval,sfpd park,bayview,kma438 sfpd,richmond,police commission,juvenile,southern,sfpd pistol range,sfpd public affairs,broadmoor,napa wine country,sonoma wine country,muir woods,golden gate,yosemite national park,lat_lon_sum,lat_lon_diff,lat_lon_sum_square,lat_lon_diff_square,lat_lon_sum_sqrt,lat_lon_diff_sqrt,17,236,328,421,718,869,940,1023,1078,1163,1178,1180,1392,1466,1500,1550,1582,1817,1854,1971
0,-122.425892,37.774599,2015,5,13,23,53,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,3.215829,5.831391,3.008067,0.845653,1.355806,1.533137,5.975122,2.684701,5.563934,3.215829,3.439266,3.215829,1.312815,13.265523,8.880194,1.934922,10.071283,59.404530,57.589991,19.262010,6.824861,253.825925,-84.651293,-160.200490,7165.841420,25664.197083,128.121112,116.452474,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.260309,0.0
1,-122.425892,37.774599,2015,5,13,23,53,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,3.215829,5.831391,3.008067,0.845653,1.355806,1.533137,5.975122,2.684701,5.563934,3.215829,3.439266,3.215829,1.312815,13.265523,8.880194,1.934922,10.071283,59.404530,57.589991,19.262010,6.824861,253.825925,-84.651293,-160.200490,7165.841420,25664.197083,128.121112,116.452474,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.260309,0.0
2,-122.424363,37.800414,2015,5,13,23,33,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,4.367423,8.637074,1.269310,2.356067,4.187674,2.104179,8.061681,4.527219,8.194288,4.367423,4.180893,4.367423,4.143251,16.138541,11.121959,3.312111,12.771905,56.568830,54.730032,17.456720,5.210209,253.549838,-84.623949,-160.224777,7161.212693,25671.979275,128.127265,116.442489,0.0,0.260319,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.130387,0.0
3,-122.426995,37.800873,2015,5,13,23,30,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,4.568175,8.638335,1.505805,2.347240,4.256185,2.266698,7.960252,4.434822,8.311882,4.568175,4.017284,4.568175,4.211946,16.175650,11.027433,3.484473,12.741493,56.568564,54.667211,17.243428,4.978951,253.778666,-84.626123,-160.227868,7161.580642,25672.969671,128.129915,116.445108,0.0,0.000000,0.158150,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.158147,0.0,0.0,0.0,0.0,0.144994,0.0
4,-122.438738,37.771541,2015,5,13,23,30,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,4.337816,5.250769,3.922551,1.112332,1.763316,2.648524,4.869945,1.505609,5.878816,4.337816,2.448031,4.337816,1.739192,12.893441,7.841098,3.091893,9.322738,59.976751,57.885852,18.732892,6.403035,254.972177,-84.667196,-160.210279,7168.534155,25667.333432,128.132485,116.466970,0.0,0.000000,0.158050,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.158047,0.0,0.0,0.0,0.0,0.144902,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
878044,-122.459033,37.714056,2003,1,6,0,15,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,8.928628,1.629437,10.338360,7.718633,6.323107,8.751821,3.843072,5.985528,5.671439,8.928628,7.358873,8.928628,6.361270,6.793193,3.628949,8.368250,2.750148,66.605871,64.253355,22.996105,11.890512,257.191103,-84.744977,-160.173089,7181.711098,25655.418557,128.134948,116.506930,0.0,0.213354,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.106863,0.0
878045,-122.447364,37.731948,2003,1,6,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,6.804609,0.811302,8.102981,5.524339,4.091748,6.513286,3.273918,4.045709,4.374100,6.804609,5.559840,6.804609,4.129107,8.542123,4.782387,6.160067,4.987026,64.438549,62.270843,21.790971,10.150800,256.014382,-84.715416,-160.179311,7176.701698,25657.411695,128.129063,116.488871,0.0,0.000000,0.119641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.119639,0.0,0.0,0.0,0.0,0.109689,0.0
878046,-122.403390,37.780266,2003,1,6,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,1.502513,7.239275,2.122241,2.549515,2.539131,0.923528,7.979862,4.759947,5.633771,1.502513,5.361851,1.502513,2.505273,14.166606,10.791558,0.542878,11.586142,58.411616,57.091829,20.298338,7.920921,251.817227,-84.623125,-160.183656,7161.073249,25658.803631,128.101282,116.426979,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.300204,0.0
878047,-122.390531,37.780607,2003,1,6,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0.906934,7.917428,2.638140,3.679880,3.401615,1.997609,8.988277,5.859532,5.684533,0.906934,6.492156,0.906934,3.375912,14.478742,11.720593,1.309849,12.266192,58.187982,57.159408,21.155521,8.863172,250.686694,-84.609924,-160.171138,7158.839294,25654.793603,128.089096,116.413349,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.250544,0.0
