# Uber Projet 

# I/ Import lib 

In [1]:
import pandas as pd
import numpy as np
import os
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
from datetime import datetime
from sklearn.cluster import KMeans, MiniBatchKMeans, DBSCAN
from sklearn.metrics import silhouette_score
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder

import warnings
warnings.filterwarnings("ignore")

# II/ let's read the data 

## 1. Create df

In [None]:
dtest = pd.read_csv(r'./uber-trip-data/uber-raw-data-apr14.csv')
dtest.head(5)

In [None]:
dtest2 = pd.read_csv(r'./uber-trip-data/taxi-zone-lookup.csv')
dtest2.head(5)

In [None]:
def get_name(filename):
    # Enlève l'extension .csv et récupère la dernière partie après le dernier '-'
    name_without_ext = filename.replace('.csv', '')
    name = name_without_ext.split('-')[-1]
    return name

In [None]:
uber_dic = {}

# Liste les fichiers du dossier
for file in os.listdir('./uber-trip-data'):
    if file.endswith('.csv'):
        key = get_name(file)
        uber_dic[key] = pd.read_csv(f'./uber-trip-data/{file}')
        print(f"Ajouté: {key}")
    else:
        print(f'{file} is not a csv!')

print(f"\nNombre de fichiers CSV chargés: {len(uber_dic)}")

In [None]:
uber_dic['apr14'].head()

In [None]:
uber_dic['lookup'].head()

In [None]:
df_jajun15 = pd.read_csv(f'./uber-trip-data/uber-raw-data-janjune-15/uber-raw-data-janjune-15.csv')

## 2. EDA

In [None]:
for key in uber_dic:
    print('\n' + '**'*50)
    print('='*50)
    print(f"df : {key}")
    print('='*50)
    print(uber_dic[key].head())  # Affiche les 5 premières lignes
    print('='*50)
    uber_dic[key].info()

In [None]:
df_jajun15.info()

In [None]:
df_jajun15.head()

In [None]:
df_avsep14 = pd.concat([uber_dic['apr14'], uber_dic['may14'], uber_dic['jun14'], uber_dic['jul14'], uber_dic['aug14'], uber_dic['sep14']], ignore_index=True)

In [None]:
df_avsep14.info()

In [None]:
df_avsep14.head()

# III/ Preprocess

## 1/ 2014 working :

In [None]:
df_2014 = df_avsep14.copy(deep=True)

In [None]:
df_2014.rename(columns={key:str.lower(key) for key in df_2014.columns}, inplace=True)
df_2014.sort_values(by='date/time', inplace=True)

df_2014['date'] = df_2014['date/time'].str.split(" ").str[0]
df_2014['time'] = df_2014['date/time'].str.split(" ").str[1]
df_2014 = df_2014.drop('date/time', axis=1)

df_2014['date'] = pd.to_datetime(df_2014['date'])
df_2014['time'] = pd.to_datetime(df_2014['time']).dt.time
df_2014['year'] = df_2014['date'].dt.year
df_2014['month'] = df_2014['date'].dt.month
df_2014['day'] = df_2014['date'].dt.day
df_2014['dayofweek'] = df_2014['date'].dt.day_of_week

df_2014.head()

## 2/ 2015 working :

In [None]:
df_jajun15.info()

In [None]:
df_jajun15.head()

In [None]:
uber_dic['lookup'].info()

In [None]:
df_lookup = uber_dic['lookup']
# df_lookup = pd.read_csv(r'uber-trip-data/taxi-zone-lookup.csv')

In [None]:
df_lookup.rename(columns={key:str.lower(key) for key in df_lookup.columns}, inplace=True)
df_jajun15.rename(columns={key:str.lower(key) for key in df_jajun15.columns}, inplace=True)

In [None]:
df_lookup.head()

In [None]:
df_2015 = df_jajun15.merge(uber_dic['lookup'], on='locationid')

In [None]:
import re
df_2015['borough'] = df_2015['borough'].str.replace('[^a-zA-Z0-9 ]', '', regex=True)
df_2015['zone'] = df_2015['zone'].str.replace('[^a-zA-Z0-9 ]', ' ', regex=True)

In [None]:
df_2015

In [None]:
city = df_2015['zone'].unique().tolist()

In [None]:
city

In [None]:
replacements = {'Allerton Pelham Gardens' : 'Allerton',
                'Bay Terrace Fort Totten' : 'Fort Totten',
                'Bensonhurst East' : 'Bensonhurst',
                'Bensonhurst West' : 'Bensonhurst',
                'Bloomfield Emerson Hill' : 'Bloomfield',
                'Breezy Point Fort Tilden Riis Beach' : 'Fort Tilden',
                'Briarwood Jamaica Hills' : 'Briarwood',
                'Bushwick North' : 'Bushwick',
                'Central Harlem North' : 'Central Harlem',
                'Central Harlem' : 'Harlem',
                'Central Harlem' : 'Harlem',
                'Charleston Tottenville' : 'Tottenville',
                'Claremont Bathgate' : 'Claremont',
                'Crown Heights North' : 'Crown Heights',
                'Crown Heights South' : 'Crown Heights',
                'East Flatbush Farragut' : 'Flatbush',
                'East Flatbush Remsen Village' : 'Flatbush',
                'East Harlem North' : 'Harlem',
                'East Harlem South' : 'Harlem',
                'Elmhurst Maspeth' : 'Maspeth',
                'Eltingville Annadale Prince s Bay' : 'Eltingville',
                'Financial District North' : 'Financial District',
                'Flatbush Ditmas Park' : 'Flatbush',
                'Flushing Meadows Corona Park' : 'Flushing Meadows',
                'Fordham South' : 'Fordham',
                'Forest Park Highland Park' : 'Forest Park Highlands',
                'Freshkills Park' : 'Freshkills Park',
                'Governor s Island Ellis Island Liberty Island' : 'Ellis Island',
                'Governor s Island Ellis Island Liberty Island' : 'Ellis Island',
                'Governor s Island Ellis Island Liberty Island' : 'Ellis Island',
                'Greenwich Village North' : 'Greenwich Village',
                'Greenwich Village South' : 'Greenwich Village',
                'Grymes Hill Clifton' : 'Grymes Hill',
                'Heartland Village Todt Hill' : 'Heartland Village',
                'Hillcrest Pomonok' : 'Hillcrest',
                'Hillcrest' : '73rd Ave',
                'Lenox Hill East' : 'Lenox Hill',
                'Lenox Hill West' : 'Lenox Hill',
                'Lincoln Square East' : 'Lincoln Square',
                'Lincoln Square West' : 'Lincoln Square',
                'Madison' : 'Avenue R',
                'Marine Park Floyd Bennett Field' : 'Marine Park',
                'Marine Park Mill Basin' : 'Marine Park',
                'Melrose South' : 'Melrose',
                'Midtown Center' : 'Midtown',
                'Murray Hill Queens' : 'Murray Hill',
                'New Dorp Midland Beach' : 'Midland Beach',
                'North Corona' : 'Corona',
                'Ocean Parkway South' : 'Ocean Parkway',
                'Old Astoria' : 'Astoria',
                'Penn Station Madison Sq West' : 'Penn Station',
                'Queensbridge Ravenswood' : 'Queensbridge',
                'Riverdale North Riverdale Fieldston' : 'Riverdale',
                'Saint Michaels Cemetery Woodside' : 'Ditmars Blvd',
                'Schuylerville Edgewater Park' : 'Edgewater Park',
                'Soundview Bruckner' : 'Soundview',
                'Soundview Castle Hill' : 'Soundview',
                'South Beach Dongan Hills' : 'South Beach',
                'Springfield Gardens North' : 'Springfield Gardens',
                'Springfield Gardens South' : 'Springfield Gardens',
                'Stuy Town Peter Cooper Village' : 'Stuytown',
                'Sunset Park East' : 'Sunset Park',
                'Sunset Park West' : 'Sunset Park',
                'Sutton Place Turtle Bay North' : 'Turtle Bay',
                'Times Sq Theatre District' : 'Theater District',
                'TriBeCa Civic Center' : 'Tribeca',
                'UN Turtle Bay South' : 'Turtle Bay',
                'Upper East Side South' : 'Upper East Side',
                'Upper West Side South' : 'Upper West Side',
                'Upper East Side North' : 'Upper East Side',
                'Upper West Side North' : 'Upper West Side',
                'St Michaels Cemetery' : 'Ditmars Blvd',
                'Van Cortlandt Village' : 'Kingsbridge',
                'Washington Heights North' : 'Washington Heights',
                'Washington Heights South' : 'Washington Heights',
                'Westchester Village Unionport' : 'Unionport',
                'Williamsbridge Olinville' : 'Williamsbridge',
                'Williamsburg  North Side ' : 'Williamsburg',
                'Williamsburg  South Side ' : 'Williamsburg',
                'Yorkville East' : 'Yorkville',
                'Yorkville West' : 'Yorkville',
                'Unknown' : '',
                'Unknown' : ''}

In [None]:
for key in replacements.keys():
    df_2015['zone'] = df_2015['zone'].str.replace(key, replacements[key])

In [None]:
df_2015_a = df_2015.copy(deep=True)

In [None]:
df_2015_a['zone'].unique()

In [None]:
unique_places = df_2015[["borough", "zone"]].drop_duplicates()
unique_places.info()

In [None]:
geolocator = Nominatim(timeout=10, user_agent="uber_app")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

In [None]:
def get_location(row):
    try:
        query = f"USA, New York, {row['borough']}, {row['zone']}"
        location = geocode(query)
        if location:
            return pd.Series([location.latitude, location.longitude])
        else:
            return pd.Series([None, None])
    except Exception:
        return pd.Series([None, None])

In [None]:
unique_places[["lat", "lon"]] = unique_places.apply(get_location, axis=1)

In [None]:
df_2015_a = df_2015_a.merge(unique_places, on=["borough", "zone"], how="left")

In [None]:
df_2015_a.info()

In [None]:
import missingno as msno
msno.matrix(df_2015_a)

In [None]:
df_2015_a = df_2015_a.dropna()

In [None]:
df_2015_a.info()

In [None]:
df_2015_a = df_2015_a.drop(columns=['borough', 'zone', 'locationid', 'affiliated_base_num'], axis = 1).reset_index()
df_2015_a = df_2015_a.rename(columns={'pickup_date': 'date/time', 'dispatching_base_num' : 'base' })
df_2015_a.head()

In [None]:
df_2015_a.info()

In [None]:
df_2015_b = df_2015_a.copy(deep=True)

In [None]:
df_2015_b.sort_values(by='date/time', inplace=True)

df_2015_b['date'] = df_2015_b['date/time'].str.split(" ").str[0]
df_2015_b['time'] = df_2015_b['date/time'].str.split(" ").str[1]
df_2015_b = df_2015_b.drop('date/time', axis=1)

df_2015_b['date'] = pd.to_datetime(df_2015_b['date'])
df_2015_b['time'] = pd.to_datetime(df_2015_b['time']).dt.time
df_2015_b['year'] = df_2015_b['date'].dt.year
df_2015_b['month'] = df_2015_b['date'].dt.month
df_2015_b['day'] = df_2015_b['date'].dt.day
df_2015_b['dayofweek'] = df_2015_b['date'].dt.day_of_week

df_2015_b.head()

In [None]:
df_2015_b = df_2015_b.drop(columns='index')
df_2015_b.info()

## 3/ association 2014 2015 :

In [None]:
df_data_2015 = df_2015_b.copy(deep=True)
df_data_2014 = df_2014.copy(deep=True)

In [None]:
df_data_2014.info()

In [None]:
df_data_2015.info()

In [None]:
df_data = pd.concat([df_data_2014, df_data_2015], ignore_index=True)

In [None]:
df_data.info()

In [None]:
df_data['base'].value_counts()

In [None]:
# df_data.to_csv('uber_data.csv')

# III/ Let's Cluster

In [3]:
df_data = pd.read_csv('uber_data.csv')

## 0/ separate data

In [4]:
day_of_week = {
    0 : 'Monday',
    1 : 'Tuesday',
    2 : 'Wednesday',
    3 : 'Thursday',
    4 : 'Friday',
    5 : 'Saturday night fever',
    6 : 'Sun day ;)'
}

In [5]:
day_dict = {}

for i in range(0,7):
    day_dict[i] = df_data[df_data['dayofweek'] == i] 
    day_dict[i] = day_dict[i].sample(10_000, random_state=42)

In [6]:
day_dict[0]

Unnamed: 0.1,Unnamed: 0,lat,lon,base,date,time,year,month,day,dayofweek
15111898,15111898,40.772014,-73.930267,B02682,2015-05-25,20:35:00,2015,5,25,0
1726636,1726636,40.764500,-73.965700,B02598,2014-06-30,00:00:00,2014,6,30,0
12745128,12745128,40.729269,-73.987361,B02682,2015-04-27,12:12:00,2015,4,27,0
8115581,8115581,40.715380,-74.009306,B02764,2015-02-23,20:02:27,2015,2,23,0
4736300,4736300,40.766437,-73.959017,B02764,2015-01-05,10:48:43,2015,1,5,0
...,...,...,...,...,...,...,...,...,...,...
16403311,16403311,40.705751,-74.002906,B02682,2015-06-08,23:40:00,2015,6,8,0
9614074,9614074,40.731931,-73.981761,B02617,2015-03-16,10:35:00,2015,3,16,0
6973802,6973802,40.733584,-74.002817,B02617,2015-02-09,10:01:36,2015,2,9,0
2620862,2620862,40.695200,-74.178400,B02598,2014-07-07,07:08:00,2014,7,7,0


## 1/ MiniBatchKMEANS

### 1.1/ get cluster

In [7]:
def get_clusters_mkm(dayofweek):

    sil = []
    k = []
    wcss =  []
    
    for i in range (2,21): 
        kmeans = MiniBatchKMeans(n_clusters= i)
        kmeans.fit(day_dict[dayofweek][['lat','lon']])
        # elbow
        wcss.append(kmeans.inertia_)
        
        # sil score
        sil.append(silhouette_score(day_dict[dayofweek][['lat','lon']], kmeans.predict(day_dict[dayofweek][['lat','lon']])))
        k.append(i)
        # print("Silhouette score for K={} is {}".format(i, sil[-1]))
    # print(wcss)

    # == elbow method ==
    cluster_scores=pd.DataFrame(sil)
    k_frame = pd.Series(k)

    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=k,
        y=wcss, 
        mode='lines+markers', 
        name= 'WCSS', 
        yaxis='y1'
    ))

    fig.add_trace(go.Bar(
        x=k,
        y=sil, 
        name='sil_score',
        opacity=0.3,
        yaxis='y2'
    ))

    fig.update_layout(
        title=f"WCSS and Silhouette Score for {day_of_week[dayofweek]}",
        xaxis=dict(title="Number of Clusters (k)"),
        yaxis=dict(
            title="WCSS",
            showgrid=False,
            side="left"
        ),
        yaxis2=dict(
            title="Silhouette Score",
            overlaying="y",
            side="right",
            showgrid=False
        ),
        title_x=0.5,
        template="plotly_white"
    )

    fig.show()
    

In [8]:
for i in range(0,7):
    get_clusters_mkm(i)

### 1.2/ show cluster 

In [9]:
def show_cluster_mkm(dayofweek, cluster):
    print('MiniKmeans **'*50)
    print(f'show for {day_of_week[dayofweek]}')
    minikmeans = MiniBatchKMeans(n_clusters=cluster, random_state=42)
    minikmeans.fit(day_dict[dayofweek][['lat', 'lon']])
    day_dict[dayofweek]['cluster'] = minikmeans.predict(day_dict[dayofweek][['lat', 'lon']])

    fig2 = px.scatter_map(day_dict[dayofweek], 
                          lat='lat', 
                          lon='lon', 
                          color='cluster', 
                          color_continuous_scale='Bluered')
    fig2.update_layout(
        title = f"All {cluster} clusters for {day_of_week[dayofweek]}",
        title_x=0.5
        )
    fig2.show()

In [10]:
show_cluster_mkm(0,5)

MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **
show for Monday


In [11]:
show_cluster_mkm(1,5)

MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **
show for Tuesday


In [12]:
show_cluster_mkm(2,5)

MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **
show for Wednesday


In [13]:
show_cluster_mkm(2,9)

MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **
show for Wednesday


In [14]:
show_cluster_mkm(2,14)

MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **
show for Wednesday


In [15]:
show_cluster_mkm(3,5)

MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **
show for Thursday


In [16]:
show_cluster_mkm(4,5)

MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **
show for Friday


In [17]:
show_cluster_mkm(5,5)

MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **
show for Saturday night fever


In [18]:
show_cluster_mkm(6,5)

MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **
show for Sun day ;)


### 1.3/ Conclusion 

## 2/ KMeans

### 2.1/ get clusters

In [19]:
def get_clusters_km(dayofweek):

    sil = []
    k = []
    wcss =  []
    
    for i in range (2,21): 
        kmeans = KMeans(n_clusters= i)
        kmeans.fit(day_dict[dayofweek][['lat','lon']])
        # elbow
        wcss.append(kmeans.inertia_)
        
        # sil score
        sil.append(silhouette_score(day_dict[dayofweek][['lat','lon']], kmeans.predict(day_dict[dayofweek][['lat','lon']])))
        k.append(i)
        # print("Silhouette score for K={} is {}".format(i, sil[-1]))
    # print(wcss)

    # == elbow method ==
    cluster_scores=pd.DataFrame(sil)
    k_frame = pd.Series(k)

    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=k,
        y=wcss, 
        mode='lines+markers', 
        name= 'WCSS', 
        yaxis='y1'
    ))

    fig.add_trace(go.Bar(
        x=k,
        y=sil, 
        name='sil_score',
        opacity=0.3,
        yaxis='y2'
    ))

    fig.update_layout(
        title=f"WCSS and Silhouette Score for {dayofweek}",
        xaxis=dict(title="Number of Clusters (k)"),
        yaxis=dict(
            title="WCSS",
            showgrid=False,
            side="left"
        ),
        yaxis2=dict(
            title="Silhouette Score",
            overlaying="y",
            side="right",
            showgrid=False
        ),
        title_x=0.5,
        template="plotly_white"
    )

    fig.show()
    

In [20]:
for i in range(0,7):
    get_clusters_km(i)

### 2.2/ show cluters

In [21]:
def show_cluster_km(dayofweek, cluster):
    print('Kmeans **'*50)
    print(f'show for {day_of_week[dayofweek]}')
    minikmeans = KMeans(n_clusters=cluster, random_state=42)
    minikmeans.fit(day_dict[dayofweek][['lat', 'lon']])
    day_dict[dayofweek]['cluster'] = minikmeans.predict(day_dict[dayofweek][['lat', 'lon']])

    fig2 = px.scatter_map(day_dict[dayofweek], 
                          lat='lat', 
                          lon='lon', 
                          color='cluster', 
                          color_continuous_scale='Bluered')
    fig2.update_layout(
        title = f"All {cluster} clusters for {day_of_week[dayofweek]}",
        title_x=0.5
        )
    fig2.show()

In [22]:
show_cluster_km(0,5)

Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **
show for Monday


In [23]:
show_cluster_km(1,5)

Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **
show for Tuesday


In [24]:
show_cluster_km(2,5)

Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **
show for Wednesday


In [25]:
show_cluster_km(3,5)

Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **
show for Thursday


In [26]:
show_cluster_km(4,5)

Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **
show for Friday


In [27]:
show_cluster_km(5,5)

Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **
show for Saturday night fever


In [28]:
show_cluster_km(6,5)

Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **
show for Sun day ;)


### 2.3/ Conclusion

## 3/ DBScan

In [29]:
d_dict = day_dict

### 3.1/ get clusters

In [30]:
def get_clusters_dbs(dayofweek):    
    dbs_results = []

    # Loops over different eps and min_samples
    for eps in np.arange(0.005, 0.5, 0.05):
        for min_samples in range(10, 1000, 100):
            db = DBSCAN(eps=eps, min_samples=min_samples, metric="euclidean")
            labels = db.fit_predict(d_dict[dayofweek][['lat', 'lon']])
            n_clusters = len(set(labels)) - (1 if -1 in labels else 0)

            # filter to have number of clusters between 5 and 10
            if 5 <= n_clusters <= 10:
                d_dict[dayofweek]['cluster'] = db.labels_
                max_item = max(d_dict[dayofweek]['cluster'].value_counts())

                # filter to have less than 150 items max by cluster
                # if max_item > 150:
                dbs_results.append({'eps': eps, 'min_samp' : min_samples, 'n_clusters': n_clusters, 'max_item' : max_item})
    
    dbs_results = pd.DataFrame(dbs_results, columns=['eps', 'min_samp', 'n_clusters','max_item'])
    # dbs_results = dbs_results.sort_values(by="max_item", ascending=False)
    # print(f'results for {day_of_week[dayofweek]}')
    # print(dbs_results.head())
    return dbs_results

In [31]:
results_dbs_day = {}

for i in range (0,7):
    results_dbs_day[i]= get_clusters_dbs(i)

In [32]:
for i in range (0,7):
    print('**'*50)
    print(f'day {day_of_week[i]}')
    print('**'*50)
    print(results_dbs_day[i].info())
    print('='*50)
    print(results_dbs_day[i].head())

****************************************************************************************************
day Monday
****************************************************************************************************
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   eps         4 non-null      float64
 1   min_samp    4 non-null      int64  
 2   n_clusters  4 non-null      int64  
 3   max_item    4 non-null      int64  
dtypes: float64(1), int64(3)
memory usage: 260.0 bytes
None
     eps  min_samp  n_clusters  max_item
0  0.005       110           8      5850
1  0.005       210           7      4924
2  0.005       310           7      4606
3  0.005       410           6      5804
****************************************************************************************************
day Tuesday
***************************************************************

In [33]:
for i in range (0,7):
    print(fr"the numbers cluster on day {day_of_week[i]} have {results_dbs_day[i]['n_clusters'].unique()}")

the numbers cluster on day Monday have [8 7 6]
the numbers cluster on day Tuesday have [8 6 5]
the numbers cluster on day Wednesday have [6 5]
the numbers cluster on day Thursday have [6 7 8]
the numbers cluster on day Friday have [7 8]
the numbers cluster on day Saturday night fever have [8 6 7]
the numbers cluster on day Sun day ;) have [8 7 5]


In [34]:
for i in range (0,7):
    results_dbs_day[i] = results_dbs_day[i].sort_values(by="max_item", ascending=False)
    print('='*50)
    print(f'day {day_of_week[i]}')
    print(results_dbs_day[i].head())


day Monday
     eps  min_samp  n_clusters  max_item
0  0.005       110           8      5850
3  0.005       410           6      5804
1  0.005       210           7      4924
2  0.005       310           7      4606
day Tuesday
     eps  min_samp  n_clusters  max_item
0  0.005       110           8      6137
2  0.005       410           5      5657
1  0.005       310           6      4178
day Wednesday
     eps  min_samp  n_clusters  max_item
0  0.005       110           6      6297
2  0.005       410           6      5236
1  0.005       310           5      3864
day Thursday
     eps  min_samp  n_clusters  max_item
0  0.005       110           6      6403
2  0.005       410           8      4445
1  0.005       310           7      3700
day Friday
     eps  min_samp  n_clusters  max_item
0  0.005       110           7      6134
2  0.005       410           7      4993
1  0.005       310           8      4012
day Saturday night fever
     eps  min_samp  n_clusters  max_item
0  0.005    

### 3.2/ show cluster

In [35]:
def show_cluster_dbs(dayofweek, eps_value, min_samp_value):
    db = DBSCAN(eps=eps_value, min_samples=min_samp_value, metric="euclidean")
    db.fit(d_dict[dayofweek][['lat', 'lon']])
    d_dict[dayofweek]['cluster'] = db.labels_
    # labels = db.fit_predict(d_dict[dayofweek][['lat', 'lon']])
    n_clusters = d_dict[dayofweek]['cluster'].value_counts().shape
    print('DBScan **'*50)
    print(f'show for {day_of_week[dayofweek]}')
    print(d_dict[dayofweek]['cluster'].value_counts())

    fig3 = px.scatter_map(day_dict[dayofweek], 
                          lat='lat', 
                          lon='lon', 
                          color='cluster', 
                          color_continuous_scale='Bluered')
    fig3.update_layout(
        title = f"All {n_clusters} clusters for {day_of_week[dayofweek]}",
        title_x=0.5
        )
    fig3.show()

In [36]:
for i in range (0,7):
    show_cluster_dbs(i,0.005,410)

DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **
show for Monday
cluster
-1    5804
 2    1516
 0     754
 1     724
 3     511
 5     468
 4     223
Name: count, dtype: int64


DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **
show for Tuesday
cluster
-1    5657
 0    1789
 3     847
 1     713
 2     546
 4     448
Name: count, dtype: int64


DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **
show for Wednesday
cluster
-1    5236
 0    1907
 2     898
 1     712
 5     524
 4     501
 3     222
Name: count, dtype: int64


DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **
show for Thursday
cluster
-1    4445
 1    1809
 2     894
 0     830
 3     495
 4     483
 6     447
 5     394
 7     203
Name: count, dtype: int64


DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **
show for Friday
cluster
-1    4993
 2    1473
 3     970
 1     830
 0     578
 4     483
 5     474
 6     199
Name: count, dtype: int64


DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **
show for Saturday night fever
cluster
-1    6657
 2    1458
 0     973
 1     912
Name: count, dtype: int64


DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **
show for Sun day ;)
cluster
-1    7189
 1     878
 3     840
 0     573
 2     466
 4      54
Name: count, dtype: int64


In [37]:
show_cluster_dbs(0,0.012,110)

DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **
show for Monday
cluster
 0    7968
-1     982
 3     312
 4     310
 1     202
 2     136
 5      90
Name: count, dtype: int64


In [38]:
show_cluster_dbs(0,0.01,110)

DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **
show for Monday
cluster
 0    7522
-1    1214
 1     351
 4     312
 5     268
 2     201
 3     132
Name: count, dtype: int64


### 3.3/ Conclusion 

## 4/ comparaison des methode de clusters 

In [39]:
for i in range(0,7):
    print(f'for the day {day_of_week[i]}')
    if i==0:
        show_cluster_mkm(i,5)
        show_cluster_km(i,7)
        show_cluster_dbs(i,0.01,410)
    if i==1:
        show_cluster_mkm(i,2)
        show_cluster_km(i, 2)
        show_cluster_dbs(i,0.01,410)
    if i==2:
        show_cluster_mkm(i,7)
        show_cluster_km(i, 7)
        show_cluster_dbs(i,0.01,410)

for the day Monday
MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **
show for Monday


Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **
show for Monday


DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **
show for Monday
cluster
 0    6905
-1    3095
Name: count, dtype: int64


for the day Tuesday
MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **
show for Tuesday


Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **
show for Tuesday


DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **
show for Tuesday
cluster
 0    7317
-1    2683
Name: count, dtype: int64


for the day Wednesday
MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **MiniKmeans **
show for Wednesday


Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **Kmeans **
show for Wednesday


DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **DBScan **
show for Wednesday
cluster
 0    7412
-1    2588
Name: count, dtype: int64


for the day Thursday
for the day Friday
for the day Saturday night fever
for the day Sun day ;)


# IV/ Conclusion