### Import relevant libraries

In [220]:
import pandas as pd
import numpy as np
import os
import datetime

### Load the dataset

In [221]:
traffic_data = pd.read_csv(os.path.join(os.path.pardir,'data','raw','train_revised.csv'))

# display the first 5 rows
traffic_data

Unnamed: 0,ride_id,seat_number,payment_method,payment_receipt,travel_date,travel_time,travel_from,travel_to,car_type,max_capacity
0,1442,15A,Mpesa,UZUEHCBUSO,17-10-17,7:15,Migori,Nairobi,Bus,49
1,5437,14A,Mpesa,TIHLBUSGTE,19-11-17,7:12,Migori,Nairobi,Bus,49
2,5710,8B,Mpesa,EQX8Q5G19O,26-11-17,7:05,Keroka,Nairobi,Bus,49
3,5777,19A,Mpesa,SGP18CL0ME,27-11-17,7:10,Homa Bay,Nairobi,Bus,49
4,5778,11A,Mpesa,BM97HFRGL9,27-11-17,7:12,Migori,Nairobi,Bus,49
5,5777,18B,Mpesa,B6PBDU30IZ,27-11-17,7:10,Homa Bay,Nairobi,Bus,49
6,5777,14A,Mpesa,MZHGDGS6QZ,27-11-17,7:10,Homa Bay,Nairobi,Bus,49
7,5778,25,Mpesa,MYVTYFNXDZ,27-11-17,7:12,Migori,Nairobi,Bus,49
8,5778,21B,Mpesa,TE1WYK1NYE,27-11-17,7:12,Migori,Nairobi,Bus,49
9,5781,22A,Mpesa,VGG7Q3MVJX,27-11-17,7:09,Homa Bay,Nairobi,Bus,49


### Adding a column for the actual weekday

In [222]:
# creating a copy of the dataset
new_trafic_data = traffic_data

# convert values of travel_date columns to datetime
route_dates = pd.to_datetime(new_trafic_data['travel_date'])

# adding a new column (weekday) to our dataframe
new_trafic_data['weekday'] = route_dates.dt.weekday_name

### Filtering out the routes  travelled on a Sunday

In [223]:
traveled_on_sunday = new_trafic_data[new_trafic_data['weekday']=='Sunday']
traveled_on_sunday.head()

Unnamed: 0,ride_id,seat_number,payment_method,payment_receipt,travel_date,travel_time,travel_from,travel_to,car_type,max_capacity,weekday
1,5437,14A,Mpesa,TIHLBUSGTE,19-11-17,7:12,Migori,Nairobi,Bus,49,Sunday
2,5710,8B,Mpesa,EQX8Q5G19O,26-11-17,7:05,Keroka,Nairobi,Bus,49,Sunday
111,5856,9,Cash,MDF1ULUMPX,15-04-18,5:20,Kisii,Nairobi,shuttle,11,Sunday
112,5857,6,Cash,MDF1ULG1GR,15-04-18,5:00,Kisii,Nairobi,shuttle,11,Sunday
113,5857,3,Cash,MDF6ULD8NQ,15-04-18,5:00,Kisii,Nairobi,shuttle,11,Sunday


### Get the total number of times traveled on a route

In [224]:
total_counts = traveled_on_sunday.groupby(['travel_from','travel_to']).size().reset_index(name='Count')
total_counts

Unnamed: 0,travel_from,travel_to,Count
0,Awendo,Nairobi,296
1,Homa Bay,Nairobi,910
2,Kehancha,Nairobi,301
3,Keroka,Nairobi,41
4,Keumbu,Nairobi,3
5,Kijauri,Nairobi,115
6,Kisii,Nairobi,2756
7,Mbita,Nairobi,83
8,Migori,Nairobi,973
9,Ndhiwa,Nairobi,45


### Sort the data using Count column in decending order

In [225]:
total_counts_sorted = total_counts.sort_values(['Count'], ascending=[0])
total_counts_sorted

Unnamed: 0,travel_from,travel_to,Count
6,Kisii,Nairobi,2756
8,Migori,Nairobi,973
1,Homa Bay,Nairobi,910
13,Sirare,Nairobi,629
12,Rongo,Nairobi,538
2,Kehancha,Nairobi,301
0,Awendo,Nairobi,296
5,Kijauri,Nairobi,115
10,Nyachenge,Nairobi,85
7,Mbita,Nairobi,83


### Get the averange

In [226]:
total_counts_sorted['Average'] = total_counts_sorted['Count']/total_counts_sorted.shape[0]
total_counts_sorted.iloc[0:7, :]

Unnamed: 0,travel_from,travel_to,Count,Average
6,Kisii,Nairobi,2756,183.733333
8,Migori,Nairobi,973,64.866667
1,Homa Bay,Nairobi,910,60.666667
13,Sirare,Nairobi,629,41.933333
12,Rongo,Nairobi,538,35.866667
2,Kehancha,Nairobi,301,20.066667
0,Awendo,Nairobi,296,19.733333


## Question Two

In [227]:
kijauri_route_data = new_trafic_data[new_trafic_data['travel_from']=='Kijauri']
kijauri_route_data.head()

Unnamed: 0,ride_id,seat_number,payment_method,payment_receipt,travel_date,travel_time,travel_from,travel_to,car_type,max_capacity,weekday
1625,6560,1A,Mpesa,XY20JTZHY4,04-12-17,7:07,Kijauri,Nairobi,Bus,49,Wednesday
1658,6568,9,Mpesa,EJXHRAFPCQ,04-12-17,7:00,Kijauri,Nairobi,shuttle,11,Wednesday
1663,6560,21B,Mpesa,QVBJ6ZAT75,04-12-17,7:07,Kijauri,Nairobi,Bus,49,Wednesday
1685,6570,20B,Mpesa,0KBEL0BJDK,04-12-17,7:06,Kijauri,Nairobi,Bus,49,Wednesday
1700,6568,10,Mpesa,IFXMNKI8NO,04-12-17,7:00,Kijauri,Nairobi,shuttle,11,Wednesday


### Probability that a passenger will onboard a shutle

In [228]:
const_time = datetime.datetime.strptime('7:30','%H:%M').time()
time = pd.to_datetime(kijauri_route_before_time['travel_time'],format= '%H:%M').dt.time
new_kijauri_route_before_time=kijauri_route_before_time[time < const_time]
shuttle = new_kijauri_route_before_time[new_kijauri_route_before_time['car_type'] == 'shuttle']

In [229]:
shuttle.shape[0]/new_kijauri_route_before_time.shape[0]

0.5286935286935287

### Question 3

In [230]:
kissi = new_trafic_data[new_trafic_data['travel_from']=='Kisii']
kissi

Unnamed: 0,ride_id,seat_number,payment_method,payment_receipt,travel_date,travel_time,travel_from,travel_to,car_type,max_capacity,weekday
12,5784,1X,Mpesa,3OPU23FHRB,20-04-18,5:10,Kisii,Nairobi,shuttle,11,Friday
13,5789,1X,Mpesa,BPRX4UMFFN,20-04-18,9:50,Kisii,Nairobi,shuttle,11,Friday
15,5790,1B,Mpesa,DC8ABRSXXK,20-04-18,7:06,Kisii,Nairobi,Bus,49,Friday
16,5791,1,Mpesa,HGQURN0SZS,20-04-18,6:00,Kisii,Nairobi,shuttle,11,Friday
17,5792,1,Mpesa,8D9V8HK2FC,20-04-18,5:50,Kisii,Nairobi,shuttle,11,Friday
18,5793,1,Mpesa,TTZQDYZHAB,20-04-18,10:58,Kisii,Nairobi,shuttle,11,Friday
19,5790,4A,Mpesa,F8VBKYCXPL,20-04-18,7:06,Kisii,Nairobi,Bus,49,Friday
20,5794,1,Mpesa,ETNVCDEZLR,20-04-18,6:50,Kisii,Nairobi,shuttle,11,Friday
23,5795,1,Mpesa,VIRXVJUIYX,20-04-18,5:30,Kisii,Nairobi,shuttle,11,Friday
24,5796,5,Mpesa,9CSSSQN2TN,20-04-18,6:40,Kisii,Nairobi,shuttle,11,Friday
