In [1]:
import numpy as np
from datetime import datetime

In [2]:
#import data
data = np.genfromtxt('taxi_tripdata.csv', delimiter=',', dtype=None, names=True, encoding='utf-8')

In [3]:
# Convert date strings to datetime objects for pickup and dropoff times
pickup_times = np.array([datetime.strptime(dt, '%Y-%m-%d %H:%M:%S') for dt in data['lpep_pickup_datetime']])
dropoff_times = np.array([datetime.strptime(dt, '%Y-%m-%d %H:%M:%S') for dt in data['lpep_dropoff_datetime']])

In [4]:
#convert to  seconds
pickup_times = np.array([pickup_times[i].timestamp() for i in range(len(pickup_times))])

In [5]:
#convert to  seconds
dropoff_times = np.array([dropoff_times[i].timestamp() for i in range(len(dropoff_times))])

In [6]:
# Calculate trip durations in minutes

In [7]:
trip_duration = (dropoff_times - pickup_times) /60

In [8]:
trip_duration

array([ 4.73333333, 35.91666667,  6.03333333, ..., 22.        ,
       32.        , 26.        ])

In [9]:
#eliminate the invalid duration

In [10]:
valid_duration = trip_duration > 0

In [11]:
#Calculate valid speed the equation speed = distant/duration
valid_speed = data['trip_distance'][valid_duration] / trip_duration[valid_duration]

In [12]:
valid_speed

array([0.25352113, 0.38116009, 0.15745856, ..., 0.23318182, 0.393125  ,
       0.43538462])

In [13]:
#calculate the average speed
average_speed = np.mean(valid_speed)
print(average_speed)

12.306515682486621


In [14]:
# Count Trips Dropped at JFK Airport (RatecodeID == 2)
condition = data['RatecodeID'] == 2
data['RatecodeID'][condition]
jfk_trips = np.sum(condition)
print('Number of trips to JFK Airport: ',jfk_trips)

Number of trips to JFK Airport:  158


In [15]:
# Count Standard rate trips  (RatecodeID == 1)
condition = data['RatecodeID'] == 1
data['RatecodeID'][condition]
str_trips = np.sum(condition)
print('Number of Standard rate trips : ',str_trips)

Number of Standard rate trips :  48994


In [16]:
#Calculate the average total amount
average_total_amount = np.mean(data['total_amount'])
print('the average total amount: ',average_total_amount)

the average total amount:  24.20483636233287


In [17]:
#Calculate the average tip amount
average_tip_amount = np.mean(data['tip_amount'])
print('the average tip amount: ',average_tip_amount)

the average tip amount:  1.0586181309818259


In [18]:
#How many trips include tips?
condition = data['tip_amount'] > 0
data['tip_amount'][condition]
trips_include_tips = np.sum(condition)
print('trips include tips: ',trips_include_tips)

trips include tips:  26394


In [19]:
# Determine the Most Common Payment Type
"""
Payment_type
A numeric code signifying how the passenger paid for the trip.
-1. Undermine payment
1. Credit card
2. Cash
3. No charge
4. Dispute
5. Unknown
6. Voided trip
"""
np.unique(data['payment_type'], return_counts=True)

(array([-1,  1,  2,  3,  4,  5]),
 array([32518, 29990, 20831,   307,    44,     1]))

In [20]:
#now eliminate the undermine payment
condition = data['payment_type'] > 0
unique ,counts = np.unique(data['payment_type'][condition], return_counts=True)

In [21]:
unique

array([1, 2, 3, 4, 5])

In [22]:
counts

array([29990, 20831,   307,    44,     1])

In [23]:
most_common_payment_type = unique[np.argmax(counts)]
most_common_payment_type

1

In [24]:
# 1 is Credit card
print('credit card payments are the most popular kind. id=',most_common_payment_type)

credit card payments are the most popular kind. id= 1


In [25]:
#Calculate the percentage of payment methods that are made via credit card
total_payment_type = np.sum(counts)
total_payment_type

51173

In [26]:
credit_card_total = counts[0]
credit_card_total

29990

In [27]:
#The equation (credit card total / total payment type)*100
percentage_of_credit_card = (credit_card_total/total_payment_type)*100
percentage_of_credit_card

58.605123795751666

In [28]:
print('the percentage of payment methods that are made via credit card: '+str(percentage_of_credit_card)+'%')

the percentage of payment methods that are made via credit card: 58.605123795751666%
