In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly as pt
import plotly.graph_objs as go

In [None]:
uber_csv = pd.read_csv('uber_drives.csv')

In [None]:
# Using drop() function to delete last row
uber_csv.drop(index=uber_csv.index[-1],axis=0,inplace=True)
uber_csv.tail()


Unnamed: 0,START_DATE*,END_DATE*,CATEGORY*,START*,STOP*,MILES*,PURPOSE*
1150,12/31/2016 1:07,12/31/2016 1:14,Business,Kar?chi,Kar?chi,0.7,Meeting
1151,12/31/2016 13:24,12/31/2016 13:42,Business,Kar?chi,Unknown Location,3.9,Temporary Site
1152,12/31/2016 15:03,12/31/2016 15:38,Business,Unknown Location,Unknown Location,16.2,Meeting
1153,12/31/2016 21:32,12/31/2016 21:50,Business,Katunayake,Gampaha,6.4,Temporary Site
1154,12/31/2016 22:08,12/31/2016 23:51,Business,Gampaha,Ilukwatta,48.2,Temporary Site


In [None]:
uber_csv['START_DATE*'] = pd.to_datetime(uber_csv['START_DATE*'])
uber_csv['END_DATE*'] = pd.to_datetime(uber_csv['END_DATE*'])

In [None]:
uber_csv.head()

Unnamed: 0,START_DATE*,END_DATE*,CATEGORY*,START*,STOP*,MILES*,PURPOSE*
0,2016-01-01 21:11:00,2016-01-01 21:17:00,Business,Fort Pierce,Fort Pierce,5.1,Meal/Entertain
1,2016-01-02 01:25:00,2016-01-02 01:37:00,Business,Fort Pierce,Fort Pierce,5.0,
2,2016-01-02 20:25:00,2016-01-02 20:38:00,Business,Fort Pierce,Fort Pierce,4.8,Errand/Supplies
3,2016-01-05 17:31:00,2016-01-05 17:45:00,Business,Fort Pierce,Fort Pierce,4.7,Meeting
4,2016-01-06 14:42:00,2016-01-06 15:49:00,Business,Fort Pierce,West Palm Beach,63.7,Customer Visit


## Let's look into the ``value_counts`` of columns containing discrete information

In [None]:
cat_plot = uber_csv['CATEGORY*'].value_counts()
cat_plot


Business    1078
Personal      77
Name: CATEGORY*, dtype: int64

In [None]:
labels = ['Business','Personal']
values = [cat_plot['Business'],cat_plot['Personal']]
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.4)])
fig.update_layout(
    title_text="Analysis on value counts - \'Category\'")
fig.show()

# Therefore it can be inferred from the above that business rides comprise 93% of total rides.

# Analysing ``START*`` location for any uber in the given data

In [None]:
start_plot = uber_csv['START*'].value_counts().sort_index(ascending=True)
start_plot = start_plot.to_frame().reset_index()
start_plot.columns = ['Location', 'Start']
start_plot.head()

Unnamed: 0,Location,Start
0,Agnew,4
1,Almond,1
2,Apex,17
3,Arabi,1
4,Arlington,1


In [None]:
labels = list(start_plot['Location'])
values = start_plot['Start']
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.5,textinfo='none',)])
fig.update_layout(
    title_text="Analysis on value counts - \'Start-Location\'")
fig.show()

# Top ten starting locations

In [None]:
start_plot_top_10 = start_plot.sort_values(by='Start',ascending=False)

In [None]:
labels = list(start_plot_top_10['Location'][0:10])
values = start_plot_top_10['Start'][0:10]
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.5)])
fig.update_layout(
    title_text="Top ten starting points")
fig.show()

In [None]:
# import json

# with open('custom.geo.json') as response:
#   the_world = json.load(response)

In [None]:
# import json

# import plotly.express as px

# fig = px.choropleth(start_plot, geojson=the_world, locations='Location', color='Start',
#                            color_continuous_scale="Viridis",
#                            range_color=(0, 12),
#                            scope="usa",
#                            labels={'unemp':'unemployment rate'}
#                           )
# fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
# fig.show()

### I tried making that into a big fucking map, But it does not crack.So I decided fuck it!, I have a lot more bitches to deal with than sitting with something which beareth no fruit.

# Now let us analyze ``PURPOSE*`` of visit

In [None]:
purpose_df = uber_csv['PURPOSE*'].value_counts().sort_values(ascending=False).to_frame()

In [None]:
# purpose_df.head()

purpose_df = purpose_df.reset_index()
purpose_df.columns = ['PURPOSE*', 'Number']
purpose_df.head()

Unnamed: 0,PURPOSE*,Number
0,Meeting,187
1,Meal/Entertain,160
2,Errand/Supplies,128
3,Customer Visit,101
4,Temporary Site,50


In [None]:
labels = list(purpose_df['PURPOSE*'])
values = purpose_df['Number']
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.5,)])
fig.update_layout(
    title_text="Analysis on value counts - \'Purpose\'")
fig.show()

## Because of an overwhelming majority of 'Business' in ``CATEGORY``, 30% of the rides are for meetings. Meetings, Meals and Errands are the top three purposes for any uber meet

# Lets step a bit deeper, Let us use ``group by`` to group start and end locations. How many unique trips(Same start and END) exist

In [None]:
uber_csv_grp_by = uber_csv.groupby(['START*','STOP*']).size().sort_values(ascending=False).to_frame()

In [None]:
uber_csv_grp_by = uber_csv_grp_by.reset_index()
uber_csv_grp_by.columns = ['START*', 'STOP*','N(Trips)']
uber_csv_grp_by.head()

Unnamed: 0,START*,STOP*,N(Trips)
0,Unknown Location,Unknown Location,86
1,Morrisville,Cary,75
2,Cary,Morrisville,67
3,Cary,Cary,53
4,Cary,Durham,36


In [None]:
uber_csv_grp_by['full_trip'] = uber_csv_grp_by['START*'] + '-' + uber_csv_grp_by['STOP*']

In [None]:
labels = list(uber_csv_grp_by['full_trip'])
values = uber_csv_grp_by['N(Trips)']
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.5,textinfo='none')])
fig.update_layout(
    title_text="Analysis on unique trips ")
fig.show()

### !Woah, Some of the most beautiful colours you will ever see on a pie. I bet your birthday cake wont have as many colors as this one. Anyways, Lets dive into the top ten in this one.

In [None]:
labels = list(uber_csv_grp_by['full_trip'][0:11])
values = uber_csv_grp_by['N(Trips)'][0:11]
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.5,)])
fig.update_layout(
    title_text="Analysis on unique trips (Top ten) ")
fig.show()

Yeah, 11 trips have been selected because **Hell to Mars** is on top. The inference I think is the most valuable is, **Uber driver makes more than a data scientist in a place called Cary**. Apparently, Cary is in North Carolina

## Places where it is *'jerk-off'* mode for uber drivers

In [None]:
labels = list(uber_csv_grp_by['full_trip'].tail(10))
values = uber_csv_grp_by['N(Trips)'].tail(10)
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.5,)])
fig.update_layout(
    title_text="Analysis on unique trips (Least ten) ")
fig.show()

# Now, lets go to the longest trips

In [None]:
uber_csv_grp_by = uber_csv.groupby(['START*','STOP*','MILES*']).size().sort_values(ascending=False).to_frame()

In [None]:
uber_csv_grp_by.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
START*,STOP*,MILES*,Unnamed: 3_level_1
Durham,Cary,9.9,20
Cary,Durham,10.4,14
Morrisville,Cary,3.1,12
Cary,Morrisville,8.4,9
Cary,Morrisville,3.0,7


In [None]:
uber_csv_mi = pd.DataFrame(uber_csv.groupby(['START*','STOP*','MILES*']).size())
uber_csv_mi_sort = uber_csv_mi.sort_values(by='MILES*',ascending=False)

In [None]:
uber_csv_mi_sort

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
START*,STOP*,MILES*,Unnamed: 3_level_1
Latta,Jacksonville,310.3,1
Jacksonville,Kissimmee,201.0,1
Asheville,Mebane,195.9,1
Unknown Location,Unknown Location,195.6,1
Morrisville,Banner Elk,195.3,1
...,...,...,...
Central,West Berkeley,0.6,1
Whitebridge,Whitebridge,0.6,1
Cary,Cary,0.5,1
Katunayaka,Katunayaka,0.5,1


In [None]:
uber_csv_mi_sort = uber_csv_mi_sort.reset_index()
uber_csv_mi_sort.columns = ['START*', 'STOP*','MILES*','N(Trips)']
uber_csv_mi_sort['full_trip'] = uber_csv_mi_sort['START*'] + '-' + uber_csv_mi_sort['STOP*']
uber_csv_mi_sort.head()

Unnamed: 0,START*,STOP*,MILES*,N(Trips),full_trip
0,Latta,Jacksonville,310.3,1,Latta-Jacksonville
1,Jacksonville,Kissimmee,201.0,1,Jacksonville-Kissimmee
2,Asheville,Mebane,195.9,1,Asheville-Mebane
3,Unknown Location,Unknown Location,195.6,1,Unknown Location-Unknown Location
4,Morrisville,Banner Elk,195.3,1,Morrisville-Banner Elk


In [None]:
labels = list(uber_csv_mi_sort['full_trip'].head(20))
values = uber_csv_mi_sort['MILES*'].head(20)
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.5,)])
fig.update_layout(
    title_text="Longest trips (Top 20) ")
fig.show()

In [None]:
labels = list(uber_csv_mi_sort['full_trip'].tail(20))
values = uber_csv_mi_sort['MILES*'].tail(20)
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.5,)])
fig.update_layout(
    title_text="Shortest trips (Top 20) ")
fig.show()

In [None]:
import plotly.figure_factory as ff

dist_data =[uber_csv_mi_sort["MILES*"].values]
group_labels = ['MILES*'] 

fig = ff.create_distplot(dist_data, group_labels)
fig.update_layout(title_text='Distance Distribution plot')

fig.show()

## A little analysis on the dates provided (``START_DATE*`` and ``END_DATE*``)

In [None]:
uber_csv.head()

Unnamed: 0,START_DATE*,END_DATE*,CATEGORY*,START*,STOP*,MILES*,PURPOSE*
0,2016-01-01 21:11:00,2016-01-01 21:17:00,Business,Fort Pierce,Fort Pierce,5.1,Meal/Entertain
1,2016-01-02 01:25:00,2016-01-02 01:37:00,Business,Fort Pierce,Fort Pierce,5.0,
2,2016-01-02 20:25:00,2016-01-02 20:38:00,Business,Fort Pierce,Fort Pierce,4.8,Errand/Supplies
3,2016-01-05 17:31:00,2016-01-05 17:45:00,Business,Fort Pierce,Fort Pierce,4.7,Meeting
4,2016-01-06 14:42:00,2016-01-06 15:49:00,Business,Fort Pierce,West Palm Beach,63.7,Customer Visit


In [None]:
uber_csv['START_MONTH'] = pd.DatetimeIndex(uber_csv['START_DATE*']).month
uber_csv.head()

Unnamed: 0,START_DATE*,END_DATE*,CATEGORY*,START*,STOP*,MILES*,PURPOSE*,START_MONTH
0,2016-01-01 21:11:00,2016-01-01 21:17:00,Business,Fort Pierce,Fort Pierce,5.1,Meal/Entertain,1
1,2016-01-02 01:25:00,2016-01-02 01:37:00,Business,Fort Pierce,Fort Pierce,5.0,,1
2,2016-01-02 20:25:00,2016-01-02 20:38:00,Business,Fort Pierce,Fort Pierce,4.8,Errand/Supplies,1
3,2016-01-05 17:31:00,2016-01-05 17:45:00,Business,Fort Pierce,Fort Pierce,4.7,Meeting,1
4,2016-01-06 14:42:00,2016-01-06 15:49:00,Business,Fort Pierce,West Palm Beach,63.7,Customer Visit,1


In [None]:
uber_csv.tail()

Unnamed: 0,START_DATE*,END_DATE*,CATEGORY*,START*,STOP*,MILES*,PURPOSE*,START_MONTH
1150,2016-12-31 01:07:00,2016-12-31 01:14:00,Business,Kar?chi,Kar?chi,0.7,Meeting,12
1151,2016-12-31 13:24:00,2016-12-31 13:42:00,Business,Kar?chi,Unknown Location,3.9,Temporary Site,12
1152,2016-12-31 15:03:00,2016-12-31 15:38:00,Business,Unknown Location,Unknown Location,16.2,Meeting,12
1153,2016-12-31 21:32:00,2016-12-31 21:50:00,Business,Katunayake,Gampaha,6.4,Temporary Site,12
1154,2016-12-31 22:08:00,2016-12-31 23:51:00,Business,Gampaha,Ilukwatta,48.2,Temporary Site,12


In [None]:
uber_start_month = pd.DataFrame(uber_csv.START_MONTH.value_counts().sort_values(ascending=False))

In [None]:
uber_start_month

Unnamed: 0,START_MONTH
12,146
8,133
11,122
2,115
3,113
7,112
6,108
10,106
1,61
4,54


In [None]:
uber_start_month = uber_start_month.reset_index()
uber_start_month.columns = ['START_MONTH', 'Number_Of_Rides']
uber_start_month.head()

Unnamed: 0,START_MONTH,Number_Of_Rides
0,12,146
1,8,133
2,11,122
3,2,115
4,3,113


In [None]:
uber_start_month.tail()

Unnamed: 0,START_MONTH,Number_Of_Rides
7,10,106
8,1,61
9,4,54
10,5,49
11,9,36


In [None]:
import plotly.express as px
fig = px.bar(uber_start_month, x='START_MONTH', y="Number_Of_Rides")
fig.show()

## **Christmas** is the month on which we can see the maximum number of rides and September has the least of it. Turns out that people who go out for christmas come before Jan 1st!!(Probably:))

In [None]:
uber_csv

Unnamed: 0,START_DATE*,END_DATE*,CATEGORY*,START*,STOP*,MILES*,PURPOSE*,START_MONTH
0,2016-01-01 21:11:00,2016-01-01 21:17:00,Business,Fort Pierce,Fort Pierce,5.1,Meal/Entertain,1
1,2016-01-02 01:25:00,2016-01-02 01:37:00,Business,Fort Pierce,Fort Pierce,5.0,,1
2,2016-01-02 20:25:00,2016-01-02 20:38:00,Business,Fort Pierce,Fort Pierce,4.8,Errand/Supplies,1
3,2016-01-05 17:31:00,2016-01-05 17:45:00,Business,Fort Pierce,Fort Pierce,4.7,Meeting,1
4,2016-01-06 14:42:00,2016-01-06 15:49:00,Business,Fort Pierce,West Palm Beach,63.7,Customer Visit,1
...,...,...,...,...,...,...,...,...
1150,2016-12-31 01:07:00,2016-12-31 01:14:00,Business,Kar?chi,Kar?chi,0.7,Meeting,12
1151,2016-12-31 13:24:00,2016-12-31 13:42:00,Business,Kar?chi,Unknown Location,3.9,Temporary Site,12
1152,2016-12-31 15:03:00,2016-12-31 15:38:00,Business,Unknown Location,Unknown Location,16.2,Meeting,12
1153,2016-12-31 21:32:00,2016-12-31 21:50:00,Business,Katunayake,Gampaha,6.4,Temporary Site,12


# Let us get deeper, 
#>**What is the *fastest* ride??** AND 
#>**The *softest* ride**

$Speed = Distance/Time$, 

We have distance, The difference in ``START_DATE*`` and ``END_DATE*`` gives us the time taken for a ride.

In [None]:
uber_csv = pd.read_csv('uber_drives.csv')

In [None]:
uber_csv.drop(index=uber_csv.index[-1],axis=0,inplace=True)
uber_csv['START_DATE*'] = pd.to_datetime(uber_csv['START_DATE*'])
uber_csv['END_DATE*'] = pd.to_datetime(uber_csv['END_DATE*'])

In [None]:
uber_csv

Unnamed: 0,START_DATE*,END_DATE*,CATEGORY*,START*,STOP*,MILES*,PURPOSE*
0,2016-01-01 21:11:00,2016-01-01 21:17:00,Business,Fort Pierce,Fort Pierce,5.1,Meal/Entertain
1,2016-01-02 01:25:00,2016-01-02 01:37:00,Business,Fort Pierce,Fort Pierce,5.0,
2,2016-01-02 20:25:00,2016-01-02 20:38:00,Business,Fort Pierce,Fort Pierce,4.8,Errand/Supplies
3,2016-01-05 17:31:00,2016-01-05 17:45:00,Business,Fort Pierce,Fort Pierce,4.7,Meeting
4,2016-01-06 14:42:00,2016-01-06 15:49:00,Business,Fort Pierce,West Palm Beach,63.7,Customer Visit
...,...,...,...,...,...,...,...
1150,2016-12-31 01:07:00,2016-12-31 01:14:00,Business,Kar?chi,Kar?chi,0.7,Meeting
1151,2016-12-31 13:24:00,2016-12-31 13:42:00,Business,Kar?chi,Unknown Location,3.9,Temporary Site
1152,2016-12-31 15:03:00,2016-12-31 15:38:00,Business,Unknown Location,Unknown Location,16.2,Meeting
1153,2016-12-31 21:32:00,2016-12-31 21:50:00,Business,Katunayake,Gampaha,6.4,Temporary Site


In [None]:
import datetime
from datetime import datetime as dt

In [None]:
# uber_csv_start1.head()

In [None]:
date1 = uber_csv['START_DATE*']
date2 = uber_csv['END_DATE*']
miles = uber_csv['MILES*']
date_df = pd.DataFrame(dict(Start_date = date1, End_date = date2, miles=miles))
date_df.head()

Unnamed: 0,Start_date,End_date,miles
0,2016-01-01 21:11:00,2016-01-01 21:17:00,5.1
1,2016-01-02 01:25:00,2016-01-02 01:37:00,5.0
2,2016-01-02 20:25:00,2016-01-02 20:38:00,4.8
3,2016-01-05 17:31:00,2016-01-05 17:45:00,4.7
4,2016-01-06 14:42:00,2016-01-06 15:49:00,63.7


In [None]:
date_df.tail()

Unnamed: 0,Start_date,End_date,miles
1150,2016-12-31 01:07:00,2016-12-31 01:14:00,0.7
1151,2016-12-31 13:24:00,2016-12-31 13:42:00,3.9
1152,2016-12-31 15:03:00,2016-12-31 15:38:00,16.2
1153,2016-12-31 21:32:00,2016-12-31 21:50:00,6.4
1154,2016-12-31 22:08:00,2016-12-31 23:51:00,48.2


In [None]:
date_df['diff_hours'] = 24*(date_df['End_date'] - date_df['Start_date'])/np.timedelta64(1,'D')

# date_df['Sno'] = np.arange(len(date_df))

In [None]:
date_df

Unnamed: 0,Start_date,End_date,miles,diff_hours
0,2016-01-01 21:11:00,2016-01-01 21:17:00,5.1,0.100000
1,2016-01-02 01:25:00,2016-01-02 01:37:00,5.0,0.200000
2,2016-01-02 20:25:00,2016-01-02 20:38:00,4.8,0.216667
3,2016-01-05 17:31:00,2016-01-05 17:45:00,4.7,0.233333
4,2016-01-06 14:42:00,2016-01-06 15:49:00,63.7,1.116667
...,...,...,...,...
1150,2016-12-31 01:07:00,2016-12-31 01:14:00,0.7,0.116667
1151,2016-12-31 13:24:00,2016-12-31 13:42:00,3.9,0.300000
1152,2016-12-31 15:03:00,2016-12-31 15:38:00,16.2,0.583333
1153,2016-12-31 21:32:00,2016-12-31 21:50:00,6.4,0.300000


In [None]:
a1 = pd.Series(range(len(uber_csv))).astype(str)

In [None]:
date_df['full_trip'] = uber_csv['START*'] + '-' + uber_csv['STOP*'] + a1

In [None]:
date_df

Unnamed: 0,Start_date,End_date,miles,diff_hours,full_trip
0,2016-01-01 21:11:00,2016-01-01 21:17:00,5.1,0.100000,Fort Pierce-Fort Pierce0
1,2016-01-02 01:25:00,2016-01-02 01:37:00,5.0,0.200000,Fort Pierce-Fort Pierce1
2,2016-01-02 20:25:00,2016-01-02 20:38:00,4.8,0.216667,Fort Pierce-Fort Pierce2
3,2016-01-05 17:31:00,2016-01-05 17:45:00,4.7,0.233333,Fort Pierce-Fort Pierce3
4,2016-01-06 14:42:00,2016-01-06 15:49:00,63.7,1.116667,Fort Pierce-West Palm Beach4
...,...,...,...,...,...
1150,2016-12-31 01:07:00,2016-12-31 01:14:00,0.7,0.116667,Kar?chi-Kar?chi1150
1151,2016-12-31 13:24:00,2016-12-31 13:42:00,3.9,0.300000,Kar?chi-Unknown Location1151
1152,2016-12-31 15:03:00,2016-12-31 15:38:00,16.2,0.583333,Unknown Location-Unknown Location1152
1153,2016-12-31 21:32:00,2016-12-31 21:50:00,6.4,0.300000,Katunayake-Gampaha1153


In [None]:
date_df.head()

Unnamed: 0,Start_date,End_date,miles,diff_hours,full_trip
0,2016-01-01 21:11:00,2016-01-01 21:17:00,5.1,0.1,Fort Pierce-Fort Pierce0
1,2016-01-02 01:25:00,2016-01-02 01:37:00,5.0,0.2,Fort Pierce-Fort Pierce1
2,2016-01-02 20:25:00,2016-01-02 20:38:00,4.8,0.216667,Fort Pierce-Fort Pierce2
3,2016-01-05 17:31:00,2016-01-05 17:45:00,4.7,0.233333,Fort Pierce-Fort Pierce3
4,2016-01-06 14:42:00,2016-01-06 15:49:00,63.7,1.116667,Fort Pierce-West Palm Beach4


In [None]:
date_df.sort_values(by='diff_hours',ascending=False)

Unnamed: 0,Start_date,End_date,miles,diff_hours,full_trip
776,2016-09-27 21:01:00,2016-09-28 02:37:00,195.6,5.600000,Unknown Location-Unknown Location776
269,2016-03-25 16:52:00,2016-03-25 22:22:00,310.3,5.500000,Latta-Jacksonville269
546,2016-07-14 16:39:00,2016-07-14 20:05:00,195.3,3.433333,Morrisville-Banner Elk546
559,2016-07-17 12:20:00,2016-07-17 15:25:00,180.2,3.083333,Boone-Cary559
881,2016-10-30 15:22:00,2016-10-30 18:23:00,195.9,3.016667,Asheville-Mebane881
...,...,...,...,...,...
786,2016-10-04 12:17:00,2016-10-04 12:18:00,15.1,0.016667,Unknown Location-Unknown Location786
807,2016-10-13 13:02:00,2016-10-13 13:02:00,0.7,0.000000,Islamabad-Islamabad807
798,2016-10-08 15:03:00,2016-10-08 15:03:00,3.6,0.000000,Karachi-Karachi798
761,2016-09-16 07:08:00,2016-09-16 07:08:00,1.6,0.000000,Unknown Location-Unknown Location761


In [None]:
date_df = date_df[date_df['diff_hours'] != 0]

In [None]:
date_df = date_df.sort_values(by='diff_hours',ascending=False)

In [None]:
date_df

Unnamed: 0,Start_date,End_date,miles,diff_hours,full_trip
776,2016-09-27 21:01:00,2016-09-28 02:37:00,195.6,5.600000,Unknown Location-Unknown Location776
269,2016-03-25 16:52:00,2016-03-25 22:22:00,310.3,5.500000,Latta-Jacksonville269
546,2016-07-14 16:39:00,2016-07-14 20:05:00,195.3,3.433333,Morrisville-Banner Elk546
559,2016-07-17 12:20:00,2016-07-17 15:25:00,180.2,3.083333,Boone-Cary559
881,2016-10-30 15:22:00,2016-10-30 18:23:00,195.9,3.016667,Asheville-Mebane881
...,...,...,...,...,...
420,2016-06-08 17:16:00,2016-06-08 17:18:00,0.5,0.033333,Soho-Tribeca420
495,2016-06-29 11:49:00,2016-06-29 11:51:00,1.6,0.033333,Whitebridge-Westpark Place495
789,2016-10-06 18:37:00,2016-10-06 18:39:00,18.4,0.033333,Unknown Location-Unknown Location789
786,2016-10-04 12:17:00,2016-10-04 12:18:00,15.1,0.016667,Unknown Location-Unknown Location786


In [None]:
# date_df = date_df[date_df['full_trip'] != 'Unknown Location-Unknown Location']

In [None]:
# date_df = date_df.sort_values(by='diff_hours',ascending=False)

In [None]:
labels = list(date_df['full_trip'].head(10))
values = list(date_df['diff_hours'].head(10))

In [None]:
labels

['Unknown Location-Unknown Location776',
 'Latta-Jacksonville269',
 'Morrisville-Banner Elk546',
 'Boone-Cary559',
 'Asheville-Mebane881',
 'Unknown Location-Unknown Location727',
 'Jacksonville-Ridgeland297',
 'Cary-Latta268',
 'Unknown Location-R?walpindi787',
 'Rawalpindi-Unknown Location1088']

## We got times for every ride, So, Lets go for longest and shortest trips.

In [None]:
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.3, )])
fig.update_layout(
    title_text="Longest trips TOP 10")
fig.show()

In [None]:
date_df['Speed'] = date_df['miles']/date_df['diff_hours']

date_df.head()

Unnamed: 0,Start_date,End_date,miles,diff_hours,full_trip,Speed
776,2016-09-27 21:01:00,2016-09-28 02:37:00,195.6,5.6,Unknown Location-Unknown Location776,34.928571
269,2016-03-25 16:52:00,2016-03-25 22:22:00,310.3,5.5,Latta-Jacksonville269,56.418182
546,2016-07-14 16:39:00,2016-07-14 20:05:00,195.3,3.433333,Morrisville-Banner Elk546,56.883495
559,2016-07-17 12:20:00,2016-07-17 15:25:00,180.2,3.083333,Boone-Cary559,58.443243
881,2016-10-30 15:22:00,2016-10-30 18:23:00,195.9,3.016667,Asheville-Mebane881,64.939227


In [None]:
date_df = date_df.sort_values(by='Speed',ascending=False)

date_df

Unnamed: 0,Start_date,End_date,miles,diff_hours,full_trip,Speed
786,2016-10-04 12:17:00,2016-10-04 12:18:00,15.1,0.016667,Unknown Location-Unknown Location786,906.000000
791,2016-10-07 10:56:00,2016-10-07 10:59:00,33.2,0.050000,Unknown Location-Lahore791,664.000000
789,2016-10-06 18:37:00,2016-10-06 18:39:00,18.4,0.033333,Unknown Location-Unknown Location789,552.000000
788,2016-10-06 17:23:00,2016-10-06 17:40:00,112.6,0.283333,R?walpindi-Unknown Location788,397.411765
804,2016-10-12 19:18:00,2016-10-12 19:21:00,18.4,0.050000,R?walpindi-Unknown Location804,368.000000
...,...,...,...,...,...,...
1150,2016-12-31 01:07:00,2016-12-31 01:14:00,0.7,0.116667,Kar?chi-Kar?chi1150,6.000000
775,2016-09-27 19:14:00,2016-09-27 20:34:00,7.3,1.333333,Lahore-Unknown Location775,5.475000
778,2016-09-29 16:13:00,2016-09-29 18:47:00,12.6,2.566667,Unknown Location-Islamabad778,4.909091
769,2016-09-20 20:47:00,2016-09-20 22:47:00,9.6,2.000000,Unknown Location-R?walpindi769,4.800000


In [None]:
labels = list(date_df['full_trip'].head(20))
values = list(date_df['Speed'].head(20))

In [None]:
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.3)])
fig.update_layout(
    title_text="Fastest trips TOP 20")
fig.show()

# WHAT THE FUDGE!!!!!!!!!!!???????????
# UBER RIDES ARE VERY FAST