<a href="https://colab.research.google.com/github/LACLanthony/crimes_hotel/blob/main/hotels_crimes_maps.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/gdrive/')
path = '/content/gdrive/MyDrive/data/'
csv1 = 'crime_2023.csv'
csv2 = 'hotels_geo.csv'

Mounted at /content/gdrive/


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from geopy.distance import geodesic as GD


**The data and the packages have been loaded. I will now create two data frames: hotels and crimes.**



In [3]:
hotels = pd.read_csv(path+csv2)
crimes = pd.read_csv(path+csv1)

In [20]:
hotels.head(2)


Unnamed: 0,hotel_name,address,city,state,zipcode,phone,lat,lng
0,Holiday Inn Express & Suites Austin Downtown -...,805 Neches St,Austin,TX,78701,(855) 862-4858,30.26931,-97.73732
1,Hotel Indigo Austin Downtown - University,810 Red River St,Austin,TX,78701,(737) 443-6259,30.26923,-97.73617


In [19]:
del hotels['Unnamed: 0']
del hotels['index']

In [16]:
crimes.head(2)

Unnamed: 0,Highest Offense Description,Occurred Date Time,Location Type,Address,X-coordinate,Y-coordinate,Latitude,Longitude,Location,APD Sector,APD District,Council District
0,THEFT,03/25/2023 12:10:00 PM,CONVENIENCE STORE,4405 E RIVERSIDE DR,3120615.0,10058135.0,30.232975,-97.723787,"(30.23297487, -97.72378708)",HE,4,3.0
1,DISTURBANCE - OTHER,03/25/2023 11:47:00 PM,RESIDENCE / HOME,300 FERGUSON DR,3131586.0,10110544.0,30.376325,-97.685107,"(30.37632539, -97.68510677)",ED,1,4.0


**Viewing the columns from each data frame, the crimes data frame can have some fields removed. However, it will be important that I keep the geo-location information on each of the data frames as they will be used in tandem later.**

In [5]:
print(crimes.columns)
print(hotels.columns)

Index(['Incident Number', 'Highest Offense Description',
       'Highest Offense Code', 'Family Violence', 'Occurred Date Time',
       'Occurred Date', 'Occurred Time', 'Report Date Time', 'Report Date',
       'Report Time', 'Location Type', 'Address', 'Zip Code',
       'Council District', 'APD Sector', 'APD District', 'PRA', 'Census Tract',
       'Clearance Status', 'Clearance Date', 'UCR Category',
       'Category Description', 'X-coordinate', 'Y-coordinate', 'Latitude',
       'Longitude', 'Location', 'Zip Codes', 'Single Member Council Districts',
       'BOUNDARIES_single_member_districts', 'Zoning Review Cases_data',
       'Neighborhood Planning Areas',
       'Boundaries: City of Austin Council Districts'],
      dtype='object')
Index(['Unnamed: 0', 'index', 'hotel_name', 'address', 'city', 'state',
       'zipcode', 'phone', 'lat', 'lng'],
      dtype='object')


**The crimes data frame will now only have the following fields. Moving forward this is what the new crimes data frame will contain.**

In [6]:
crimes = crimes[['Highest Offense Description', 'Occurred Date Time', 'Location Type', 'Address', 'X-coordinate', 'Y-coordinate', 'Latitude',
       'Longitude', 'Location', 'APD Sector', 'APD District', 'Council District']]

**Here I will view the unique crimes that exist in the crimes data frame. This is important as some crimes are of similar type and may be easier to identify if stem of the crimes are kept. i.e. "Assault with a weapon" = "Assault"**

In [12]:
crimes['Highest Offense Description'].unique()

array(['THEFT', 'DISTURBANCE - OTHER', 'ROBBERY BY ASSAULT',
       'DOC DISCHARGE GUN - PUB ROAD', 'BURG OF RES - FAM/DATING ASLT',
       'FAMILY DISTURBANCE', 'AUTO THEFT', 'DWI  .15 BAC OR ABOVE',
       'POSS OF DRUG PARAPHERNALIA', 'ASSAULT BY THREAT FAM/DATING',
       'CRIMINAL TRESPASS', 'INTER EMERG PHONECALL FAM/DATE',
       'BURGLARY NON RESIDENCE', 'ASSAULT W/INJURY-FAM/DATE VIOL',
       'AGG ASSAULT', 'AGG ASSAULT FAM/DATE VIOLENCE', 'EVADING VEHICLE',
       'BURGLARY OF VEHICLE', 'DATING DISTURBANCE', 'ASSAULT WITH INJURY',
       'ASSAULT BY CONTACT FAM/DATING', 'DWI', 'EVADING / FOOT',
       'THEFT CATALYTIC CONVERTER', 'INTERFERE W PO SERVICE ANIMALS',
       'DAMAGE CITY VEHICLE', 'CRIMINAL MISCHIEF', 'TERRORISTIC THREAT',
       'DEADLY CONDUCT', 'VIOL STATE LAW - OTHER',
       'POSS CONTROLLED SUB/NARCOTIC', 'UNLAWFUL CARRYING WEAPON',
       'INTERFERING W/EMERG PHONE CALL', 'AGG ROBBERY/DEADLY WEAPON',
       'HARASSMENT', 'POSSESSION OF MARIJUANA',
       '

**The list of crimes will be sh

In [22]:
crime = ['ASSAULT','DWI', 'CRIMINAL MISCHIEF', 'GUN',
       'DEADLY CONDUCT', 'CRASH', 'POSSESSION', 'CRIMINAL TRESPASS','THEFT',
       'ROBBERY', 'BURGLARY', 'EVADING / VEHICLE PURSUIT','WEAPON', 'FIREARM',
       'CONTROLLED','VIOL CITY ORDINANCE - OTHER', 'ASSAULT BY CONTACT FAM/DATING',
       'HARASSMENT OF A PUBLIC SERVANT', 'EVADING VEHICLE', 'ARSON',
       'CRIMINAL TRESPASS/HOTEL','DAMAGE CITY PROP', 'TAMPERING WITH GOV RECORD',
       'INDECENCY WITH A CHILD/CONTACT', 'TERRORISTIC THREAT-MASS CASLTY',
       'THEFT CATALYTIC CONVERTER', 'CIVIL DISTURBANCE/DEMO','SEXUAL ASSAULT', 
       'EVADING','RAPE','RESISTING ARREST OR SEARCH','TRESPASS', 'BOMB THREAT',
       'PROSTITUTION','URINATING IN PUBLIC PLACE']

In [23]:
crimes['Crime_Type'] = [next(iter([y for y in crime if y in x]), np.nan) for x in crimes['Highest Offense Description']]

In [24]:
crimes.dropna(inplace=True)
crimes.reset_index(drop=True,inplace=True)

In [25]:
crimes.head()

Unnamed: 0,Highest Offense Description,Occurred Date Time,Location Type,Address,X-coordinate,Y-coordinate,Latitude,Longitude,Location,APD Sector,APD District,Council District,Crime_Type
0,THEFT,03/25/2023 12:10:00 PM,CONVENIENCE STORE,4405 E RIVERSIDE DR,3120615.0,10058135.0,30.232975,-97.723787,"(30.23297487, -97.72378708)",HE,4,3.0,THEFT
1,DOC DISCHARGE GUN - PUB ROAD,03/25/2023 04:53:00 PM,HWY / ROAD / ALLEY/ STREET/ SIDEWALK,1900 RUNNING BROOK DR,3129523.0,10084997.0,30.306236,-97.693569,"(30.30623596, -97.69356946)",ID,4,4.0,GUN
2,DWI .15 BAC OR ABOVE,03/25/2023 02:42:00 AM,HWY / ROAD / ALLEY/ STREET/ SIDEWALK,7600 N MOPAC EXPY NB,3112663.0,10103446.0,30.358038,-97.745623,"(30.35803848, -97.74562307)",BA,8,10.0,DWI
3,ASSAULT BY THREAT FAM/DATING,03/25/2023 06:09:00 AM,RESIDENCE / HOME,4805 PEACH GROVE RD,3118763.0,10044324.0,30.19513,-97.730673,"(30.19513014, -97.73067296)",FR,6,2.0,ASSAULT
4,CRIMINAL TRESPASS,03/25/2023 01:01:00 PM,COMMERCIAL / OFFICE BUILDING,9616 N LAMAR BLVD,3128360.0,10107001.0,30.366797,-97.695601,"(30.36679701, -97.69560063)",ED,1,4.0,CRIMINAL TRESPASS


**Now I will further distill down the crimes data frame and create an assaults data frame that will contain all the assaults.**

In [26]:
assaults = crimes[crimes['Crime_Type']=='ASSAULT']
assaults.reset_index(inplace=True)

In [27]:
assaults.head()

Unnamed: 0,index,Highest Offense Description,Occurred Date Time,Location Type,Address,X-coordinate,Y-coordinate,Latitude,Longitude,Location,APD Sector,APD District,Council District,Crime_Type
0,3,ASSAULT BY THREAT FAM/DATING,03/25/2023 06:09:00 AM,RESIDENCE / HOME,4805 PEACH GROVE RD,3118763.0,10044324.0,30.19513,-97.730673,"(30.19513014, -97.73067296)",FR,6,2.0,ASSAULT
1,6,ASSAULT W/INJURY-FAM/DATE VIOL,03/25/2023 09:07:00 PM,RESIDENCE / HOME,8116 WEST GATE BLVD,3087799.0,10044529.0,30.197648,-97.828643,"(30.19764792, -97.82864301)",FR,1,5.0,ASSAULT
2,8,AGG ASSAULT,03/25/2023 09:28:00 PM,PARKING /DROP LOT/ GARAGE,1610 E PARMER LN,3142929.0,10114985.0,30.387785,-97.648806,"(30.38778538, -97.64880627)",ED,6,7.0,ASSAULT
3,9,AGG ASSAULT FAM/DATE VIOLENCE,03/25/2023 09:16:00 PM,RESIDENCE / HOME,902 ROMERIA DR,3119511.0,10093736.0,30.330907,-97.724639,"(30.33090738, -97.72463856)",ID,7,7.0,ASSAULT
4,16,ASSAULT WITH INJURY,03/25/2023 11:22:00 PM,HWY / ROAD / ALLEY/ STREET/ SIDEWALK,500 SAN JACINTO BLVD,3114905.0,10070325.0,30.26685,-97.740964,"(30.26685022, -97.74096351)",GE,2,9.0,ASSAULT


**Now I will feature engineer new fields. I will take the data from the crime data frame and add to the hotels data frame. The goal will be to find the mean distance of assaults that have occurred this year and how far they are from our hotels list.**
 

In [30]:
for index1, rows1 in hotels.iterrows():
  calc=[]
  for index2, rows2 in assaults.iterrows():
    calc.append(GD(rows1[['lat', 'lng']] , rows2[['Latitude', 'Longitude']]).miles)
  hotels.at[index1, 'Assault_mean_distance'] = np.mean(calc)
  hotels.at[index1, 'Assault_median_distance'] = np.median(calc)
  hotels.at[index1, 'Assault_max_distance'] = max(calc)
  hotels.at[index1, 'Assaults_min_distance']= min(calc)

**As you can now see there are new fields: "Assault_mean_distance", "Assault_median_distance", "Assault_max_distance", "Assaults_min_distance" these new fields are measured in miles and will be used for data analysis.**

In [31]:
hotels.head()

Unnamed: 0,hotel_name,address,city,state,zipcode,phone,lat,lng,Assault_mean_distance,Assault_median_distance,Assault_max_distance,Assaults_min_distance
0,Holiday Inn Express & Suites Austin Downtown -...,805 Neches St,Austin,TX,78701,(855) 862-4858,30.26931,-97.73732,5.294492,4.994135,16.288748,0.037396
1,Hotel Indigo Austin Downtown - University,810 Red River St,Austin,TX,78701,(737) 443-6259,30.26923,-97.73617,5.293358,4.979917,16.308455,0.05709
2,Holiday Inn Austin-Town Lake,20 N Interstate 35,Austin,TX,78701,(844) 335-0714,30.25396,-97.7369,5.48192,4.941585,17.329634,0.058792
3,Holiday Inn Austin Midtown,6000 Middle Fiskville Rd,Austin,TX,78752,(844) 276-6368,30.32327,-97.71028,5.892403,5.329325,16.330369,0.185042
4,Holiday Inn Austin Airport,6711 E Ben White Blvd,Austin,TX,78741,(844) 809-4270,30.21267,-97.70517,7.10578,6.333109,20.531112,0.228046


In [39]:
hotels[hotels['Assault_median_distance']<5]

Unnamed: 0,hotel_name,address,city,state,zipcode,phone,lat,lng,Assault_mean_distance,Assault_median_distance,Assault_max_distance,Assaults_min_distance
0,Holiday Inn Express & Suites Austin Downtown -...,805 Neches St,Austin,TX,78701,(855) 862-4858,30.26931,-97.73732,5.294492,4.994135,16.288748,0.037396
1,Hotel Indigo Austin Downtown - University,810 Red River St,Austin,TX,78701,(737) 443-6259,30.26923,-97.73617,5.293358,4.979917,16.308455,0.05709
2,Holiday Inn Austin-Town Lake,20 N Interstate 35,Austin,TX,78701,(844) 335-0714,30.25396,-97.7369,5.48192,4.941585,17.329634,0.058792
31,DoubleTree by Hilton Hotel Austin - University...,1617 N Interstate 35,Austin,TX,78702,(512) 479-4000,30.27709,-97.72927,5.295663,4.904306,15.874253,0.082716
43,Azul Rooftop,310 E 5th St,Austin,TX,78701,(512) 792-5622,30.26631,-97.74029,5.320036,4.999793,16.45575,0.012123
44,Omni Austin Hotel Downtown,700 San Jacinto At 8th Street,Austin,TX,78701,(512) 476-3700,30.26851,-97.74,5.30638,4.997646,16.310486,0.036511
48,Fairmont Austin,101 Red River St,Austin,TX,78701,(512) 600-2000,30.26193,-97.73885,5.360565,4.995037,16.768435,0.010723
53,Origin Hotel Austin,1825 McBee St.,Austin,TX,78723,(512) 861-1140,30.29814,-97.70453,5.612363,4.923459,15.599709,0.213203
63,Fairmont Austin,101 Red River St,Austin,TX,78701,(512) 600-2000,30.26193,-97.73885,5.360565,4.995037,16.768435,0.010723
65,Homewood Suites by Hilton Austin Downtown,78 East Ave,Austin,TX,78701,(512) 320-5454,30.25907,-97.73756,5.397214,4.975953,16.976738,0.051919
