In [2]:
import pandas as pd 
import numpy as np
import sklearn
import seaborn as sns 
import matplotlib.pyplot as plt
import plotly.express as px 
import plotly.graph_objects as go
import plotly.io as pio
import geopy.distance
import requests


import warnings
warnings.filterwarnings('ignore')

from sklearn.cluster import KMeans
from sklearn.cluster import DBSCAN

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
pio.renderers.default = "iframe"

In [None]:
data = pd.read_csv('uber-raw-data-apr14.csv')

In [None]:
display(data.shape)
display(data.dtypes)
display(data.head())
display(data.describe(include='all'))

In [None]:
data['dayofweek'] = pd.to_datetime(data["Date/Time"]).dt.dayofweek
data['hour'] = pd.to_datetime(data["Date/Time"]).dt.hour

coord = requests.get("https://nominatim.openstreetmap.org/?q=<manhattan>&format=json&limit=1").json()[0]
lat = float(coord['lat'])
lon = float(coord['lon'])

In [None]:
mask_lat = ~data.loc[:,'Lat'].isnull()
mask_lon = ~data.loc[:,'Lon'].isnull()

data = data.loc[(mask_lon) & (mask_lat),:]

In [None]:
data["distance_to_center"] = data.apply(lambda x: geopy.distance.distance((x['Lat'],x['Lon']), (lat,lon)).m,axis = 1)

In [None]:
data["distance_to_center"].head()

In [None]:
dataset = data.loc[(data['dayofweek']== 6) & (data['hour']== 12),:]

X =dataset[["distance_to_center"]].values

In [None]:

# Clusqter haute densité
db = DBSCAN(eps=500, min_samples=50
            , metric="manhattan", algorithm="brute")

# Fit on data 
db.fit(X)

# Visualize with plotly 


cluster_haute_densite = db.labels_


# Cluster basse densité
db_basse_densite = DBSCAN(eps=0.015, min_samples=20
            , metric="manhattan", algorithm="brute")

# Fit on data 
db_basse_densite.fit(X)

cluster_basse_densite = db_basse_densite.labels_

dataset["haute_densite"] = cluster_haute_densite !=-1
dataset["basse_densite"] = cluster_basse_densite !=-1

dataset["haute_densite"] = dataset["haute_densite"].astype("int")
dataset["basse_densite"] = dataset["basse_densite"].astype("int")


fig = px.scatter_mapbox(dataset,hover_name =db.labels_,lat='Lat', lon='Lon', color=db.labels_,zoom=10, mapbox_style="carto-positron")
fig.show("iframe")


In [None]:
print(dataset["haute_densite"].shape)
(cluster_haute_densite !=-1).astype("int").shape

In [None]:
for i in range(24):

    dataset = data.loc[(data['dayofweek']== 6) & (data['hour']== i),:]
    X =dataset[["Lat",'Lon']].values

    # Cluster haute densité
    db = DBSCAN(eps=0.015, min_samples=70, metric="manhattan", algorithm="brute")

    # Fit on data 
    db.fit(X)
    cluster_haute_densite = db.labels_


    # Cluster basse densité
    db_basse_densite = DBSCAN(eps=0.015, min_samples=20, metric="manhattan", algorithm="brute")

    # Fit on data 
    db_basse_densite.fit(X)

    cluster_basse_densite = db_basse_densite.labels_

    data.loc[(data['dayofweek']== 6) & (data['hour']== i),"haute_densite"] = (cluster_haute_densite !=-1).astype("int")
    data.loc[(data['dayofweek']== 6) & (data['hour']== i),"basse_densite"] = (cluster_basse_densite !=-1).astype("int")


In [None]:
data.columns

In [None]:
fig = px.scatter_mapbox(data, lat="Lat", lon="Lon", color="haute_densite", zoom = 10,
                        mapbox_style="open-street-map", color_continuous_scale = 'Reds', range_color = [5.0,10.0],
                       animation_frame = 'hour')
fig.show("iframe")

In [None]:
fig = px.density_mapbox(earthquakes, lat="Latitude", lon="Longitude", mapbox_style="open-street-map",
                       animation_frame = 'hour', zoom = 10, radius = 10)
fig.show()