In [None]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

# for webscraping import Beautiful Soup 
from bs4 import BeautifulSoup

import xml

!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

print('Libraries imported.')

In [None]:
r = requests.get('https://opendata.arcgis.com/datasets/823d86e17a6d47808c6e4f1c2dd97928_0.geojson')
fredericton_geo = r.json()
neighborhoods_data = fredericton_geo['features']
neighborhoods_data[0]

In [None]:
g = requests.get('https://opendata.arcgis.com/datasets/6179d35eacb144a5b5fdcc869f86dfb5_0.geojson')
demog_geo = g.json()

In [None]:
demog_data = demog_geo['features']
demog_data[0]

In [None]:
import os
os.listdir('.')
opencrime = 'Crime_by_neighbourhood_2017.xlsx'
workbook = pd.ExcelFile(opencrime)
print(workbook.sheet_names)

crime_df = workbook.parse('Crime_by_neighbourhood_2017')
crime_df.head()
crime_df.drop(['From_Date', 'To_Date'], axis=1,inplace=True)

#What is the crime count by neighbourhood?
crime_data = crime_df.groupby(['Neighbourhood']).size().to_frame(name='Count').reset_index()
crime_data

crime_data.describe()

crime_data.rename(index=str, columns={'Neighbourhood':'Neighbourh','Count':'Crime_Count'}, inplace=True)
crime_data

crime_data.rename({'Platt': 'Plat'},inplace=True)
crime_data.rename(index=str, columns={'Neighbourhood':'Neighbourh','Count':'Crime_Count'}, inplace=True)
crime_data

#Examine Crime Types
crimetype_data = crime_df.groupby(['Crime_Type']).size().to_frame(name='Count').reset_index()
crimetype_data
crimetype_data.describe()

crimepivot = crime_df.pivot_table(index='Neighbourhood', columns='Crime_Type', aggfunc=pd.Series.count, fill_value=0)
crimepivot


crimetype_data.plot(x='Crime_Type', y='Count', kind='barh')

#Examine theft from vehicles
mvcrime_df = crime_df.loc[crime_df['Crime_Type'] == 'THEFT FROM MV < $5000']
mvcrime_df

mvcrime_data = mvcrime_df.groupby(['Neighbourhood']).size().to_frame(name='Count').reset_index()
mvcrime_data

mvcrime_data.rename({'Platt': 'Plat'},inplace=True)
mvcrime_data.rename(index=str, columns={'Neighbourhood':'Neighbourh','Count':'MVCrime_Count'}, inplace=True)
mvcrime_data


world_geo = r'world_countries.json' # geojson file

fredericton_c_map = folium.Map(location=[45.91, -66.65], width=1000, height=750,zoom_start=12)

fredericton_c_map

## Motor Vehicle Crime <$5000 Count 
fredericton_geo = r.json()
threshold_scale = np.linspace(mvcrime_data['MVCrime_Count'].min(), mvcrime_data['MVCrime_Count'].max(),6,dtype=int)
threshold_scale = threshold_scale.tolist()
threshold_scale[-1] = threshold_scale[-1]+1

fredericton_c_map.choropleth(geo_data=fredericton_geo,data=mvcrime_data,columns=['Neighbourh', 'MVCrime_Count'],key_on='feature.properties.Neighbourh',
    threshold_scale=threshold_scale, fill_color='YlOrRd',fill_opacity=0.7,line_opacity=0.1,legend_name='Fredericton Neighbourhoods')
fredericton_c_map

In [None]:
# Population Density 
world_geo = r'world_countries.json' # geojson file
fredericton_d_map = folium.Map(location=[45.94, -66.63], width=1200, height=750,zoom_start=12)
fredericton_d_map

threshold_scale = np.linspace(demog_df['DBpop2011'].min(),demog_df['DBpop2011'].max(),6,dtype=int)
threshold_scale = threshold_scale.tolist()
threshold_scale[-1] = threshold_scale[-1]+1

fredericton_d_map.choropleth(geo_data=demog_geo,data=demog_df,columns=['OBJECTID','DBpop2011'],key_on='feature.properties.OBJECTID',
    threshold_scale=threshold_scale,fill_color='PuBuGn',fill_opacity=0.7, line_opacity=0.1,legend_name='Fredericton Population Density')
fredericton_d_map

In [None]:
#Specific locations in Fredericton
pointbook = 'Fredericton Locations.xlsx'

workbook_2 = pd.ExcelFile(pointbook)
print(workbook_2.sheet_names)

location_df = workbook_2.parse('Sheet1')
location_df

location_df.drop(['Neighbourh'], axis=1,inplace=True)
location_df


for lat, lng, point in zip(location_df['Latitude'], location_df['Longitude'], location_df['Location']):
    label = '{}'.format(point)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker([lat, lng],radium=1,popup=label,color='blue',fill=True,fill_color='#3186cc',fill_opacity=0.7,
        parse_html=False).add_to(fredericton_c_map)
fredericton_c_map

In [None]:
#Explore Fredericton Neighbourhoods

CLIENT_ID = 'Nope'
CLIENT_SECRET = 'Secret'
VERSION = '20181201'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng,            
            v['venue']['name'], 
            v['venue']['id'],
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Location', 
                  'Location Latitude', 
                  'Location Longitude', 
                  'Venue',
                  'Venue id',                
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category'        
                   ]
    
    return(nearby_venues)

fredericton_data_venues = getNearbyVenues(names=location_df['Location'],
                                   latitudes=location_df['Latitude'],
                                   longitudes=location_df['Longitude']
                                  )

print(fredericton_data_venues.shape)
fredericton_data_venues


num_top_venues = 5

for hood in freddy_grouped['Location']:
    print("----"+hood+"----")
    temp = freddy_grouped[freddy_grouped['Location'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

In [None]:
# Cluster Fredericton Locations

# set number of clusters
kclusters = 5

freddy_grouped_clustering = freddy_grouped.drop('Location', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(freddy_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]
freddy_merged = location_df

# add clustering labels
freddy_merged['Cluster Labels'] = kmeans.labels_

# merge fredericton_grouped with location df to add latitude/longitude for each location
freddy_merged = freddy_merged.join(location_venues_sorted.set_index('Location'), on='Location')

freddy_merged

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(freddy_merged['Latitude'], freddy_merged['Longitude'], freddy_merged['Location'], freddy_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker([lat, lon], radius=5,popup=label,color=rainbow[cluster-1],fill=True,fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
map_clusters