# <center>Solution: UBER Estimated Times Analysis</center>
**<center>UFRN-DATA SCIENCE</center> **
**<center>Luis Ortiz</center> **
**<center>Elizabeth Cabrera</center> **

### <span style="background-color: #000000; color:#FDFEFE">Step i. Obtain points distributed within a neighborhood.</span> 

In [1]:
import os
import json
from shapely.geometry import Polygon
from numpy import random
from shapely.geometry import Point
import folium

# import geojson file about natal neighborhood
natal_neigh = os.path.join('geojson', 'natal.geojson')

# load the data and use 'UTF-8'encoding
geo_json_natal = json.load(open(natal_neigh,encoding='UTF-8'))

# return a number of points inside the polygon
def generate_random(number, polygon, neighborhood):
    list_of_points = []
    minx, miny, maxx, maxy = polygon.bounds
    counter = 0
    while counter < number:
        x = random.uniform(minx, maxx)
        y = random.uniform(miny, maxy)
        pnt = Point(x, y)
        if polygon.contains(pnt):
            list_of_points.append([x,y,neighborhood])
            counter += 1
    return list_of_points

# Create a map object
m = folium.Map(
    location=[-5.826592, -35.212558],
    zoom_start=11,
    tiles='Stamen Terrain'
)

# Configure geojson layer
folium.GeoJson(geo_json_natal).add_to(m)

number_of_points = 3

neighborhood_names=[]
points_location=[]

# search all features
for feature in geo_json_natal['features']:
    # get the name of neighborhood
    neighborhood = feature['properties']['name']
    # take the coordinates (lat,log) of neighborhood
    geom = feature['geometry']['coordinates']
    # create a polygon using all coordinates
    polygon = Polygon(geom[0])
    # return number_of_points by neighborhood as a list [[log,lat],....]
    points = generate_random(number_of_points,polygon, neighborhood)
    # iterate over all points and print in the map
    for i,value in enumerate(points):
        log, lat, name = value
        neighborhood_names.append(name)
        points_location.append([lat,log])
        # Draw a small circle
        folium.CircleMarker([lat,log],
                    radius=2,
                    popup='%s %s%d' % (name, '#', i),
                    color='red').add_to(m)
m

### <span style="background-color: #000000; color:#FDFEFE">Step ii. Load Data.</span> 

For capture data use "CaptureUBERTimesData.py", after load using:

In [2]:
import pandas as pd

appended_data = []

for j in range(1,6):
    # dataset name
    path_in = 'UBERTimesData/'+str(j)+'.csv'
    # read the data to a dataframe
    df = pd.read_csv (path_in, encoding = 'latin-1')
    dimensions = df.shape
    num_rows = dimensions[0]
    num_cols = dimensions[1]
    df1 = pd.concat ([df.iloc[:,1:3]])
    for i in range(3,num_cols, 2):
        df2 = pd.concat ([df.iloc[0:int(num_rows),i:i+2]])
        df3 =  pd.concat ([df1,df2], axis = 1)
        df3.columns = ['NeighborhoodName', 'PointLocation', 'UBERTime0', 'CaptureData0']
        appended_data.append(df3)

path_out = 'UBERTimesData/UBERTimesData.csv'    
appended_data = pd.concat(appended_data, axis=0).reset_index(drop=True)
appended_data.to_csv(path_out, encoding = 'latin-1')

### <span style="background-color: #000000; color:#FDFEFE">Step iii. Average time that a UBER takes to arrive in a certain neighborhood.</span> 

In [3]:
import pandas as pd
from branca.colormap import linear
import folium
import os
import json
import numpy as np

# list all neighborhoods
neighborhood = []
for neigh in geo_json_natal['features']:
        neighborhood.append(neigh['properties']['name'])

neighborhood = pd.DataFrame(neighborhood,columns=['NeighborhoodName']).sort_values(by = 'NeighborhoodName').reset_index(drop=True)

# read UBER times dataset
path_in = 'UBERTimesData/UBERTimesData.csv'
df = pd.read_csv (path_in, encoding = 'latin-1')
df = pd.concat ([df.iloc[:,1:5]])

# Obtain the mean of time for each Neighborhood (using 3 points into the Neighborhood)
point_Time = df.pivot_table(index="NeighborhoodName", values="UBERTime0", aggfunc='mean')

# Joint NeighborhoodName and mean of UBER times
neighborhood = neighborhood.as_matrix()
point_Time = point_Time.as_matrix()
np_combined_data = np.concatenate([neighborhood, point_Time], axis=1)
df_combined = pd.DataFrame(np_combined_data, columns=["NeighborhoodName", "UBERTime0"])
print(df_combined.sort_values("UBERTime0", ascending=False).reset_index(drop=True))

# Create a map object
m = folium.Map(
    location=[-5.826592, -35.212558],
    zoom_start=11,
    tiles='Stamen Terrain'
)

# create a threshold of legend
threshold_scale = np.linspace(df_combined['UBERTime0'].min(),
                              df_combined['UBERTime0'].max(), 6, dtype=int).tolist()

m.choropleth(
    geo_data=geo_json_natal,
    data=df_combined,
    columns=['NeighborhoodName', 'UBERTime0'],
    key_on='feature.properties.name',
    fill_color='OrRd',
    legend_name='UBER time estimation (sec) for each neighborhood in Natal RN (2017)',
    highlight=True,
    threshold_scale = threshold_scale
)

m

                 NeighborhoodName UBERTime0
0                        Guarapes   532.442
1                         Salinas   476.554
2                      Lagoa Azul    451.73
3                  Felipe Camarão   425.119
4                        Planalto   360.033
5                     Santos Reis   359.077
6                         Redinha    356.39
7                     Ponta Negra   353.855
8                       Mãe Luiza   339.967
9                     Cidade Nova   324.547
10                       Pajuçara    322.01
11  Nossa Senhora da Apresentação   319.621
12                     Bom Pastor   311.363
13                        Pitimbu       300
14                       Nordeste   298.168
15                        Quintas   291.332
16                    Areia Preta   290.921
17                          Rocas   290.099
18                        Ribeira   280.462
19            Cidade da Esperança   279.707
20                    Cidade Alta   274.818
21                        Alecri

In [4]:
m.save("map.html")