In [1]:
%matplotlib inline
# Dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


In [2]:
# Files to load
city_data_to_load = "Resources/city_data.csv"
ride_data_to_load = "Resources/ride_data.csv"

In [3]:
# Read the city data file and store it in a pandas DataFrame.
city_data_df = pd.read_csv(city_data_to_load)
city_data_df.head(10)

Unnamed: 0,city,driver_count,type
0,Richardfort,38,Urban
1,Williamsstad,59,Urban
2,Port Angela,67,Urban
3,Rodneyfort,34,Urban
4,West Robert,39,Urban
5,West Anthony,70,Urban
6,West Angela,48,Urban
7,Martinezhaven,25,Urban
8,Karenberg,22,Urban
9,Barajasview,26,Urban


In [4]:
# Read the ride data file and store it in a pandas DataFrame.
ride_data_df = pd.read_csv(ride_data_to_load)
ride_data_df.head(10)

Unnamed: 0,city,date,fare,ride_id
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344
5,South Latoya,2019-03-11 12:26:48,9.52,1994999424437
6,New Paulville,2019-02-27 11:17:56,43.25,793208410091
7,Simpsonburgh,2019-04-26 00:43:24,35.98,111953927754
8,South Karenland,2019-01-08 03:28:48,35.09,7995623208694
9,North Jasmine,2019-03-09 06:26:29,42.81,5327642267789


In [5]:
# Combine the data into a single dataset
pyber_data_df = pd.merge(ride_data_df, city_data_df, how="left", on=["city", "city"])

# Display the DataFrame
pyber_data_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873,5,Urban
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577,72,Urban
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003,57,Urban
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178,34,Urban
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344,46,Urban


In [6]:
# Create the Urban city DataFrame.
urban_cities_df = pyber_data_df[pyber_data_df["type"] == "Urban"]
urban_cities_df.head()


Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873,5,Urban
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577,72,Urban
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003,57,Urban
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178,34,Urban
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344,46,Urban


In [7]:
# Create the Suburban and Rural city DataFrames.
suburban_cities_df = pyber_data_df[pyber_data_df["type"] == "Suburban"]
rural_cities_df = pyber_data_df[pyber_data_df["type"] == "Rural"]
rural_cities_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
2250,Randallchester,2019-04-13 11:13:31,43.22,1076079536213,9,Rural
2251,North Holly,2019-02-02 14:54:00,12.42,1985256326182,8,Rural
2252,Michaelberg,2019-03-27 18:27:34,54.85,4421836952718,6,Rural
2253,Lake Latoyabury,2019-02-23 21:12:24,47.9,3269652929887,2,Rural
2254,Lake Latoyabury,2019-05-06 08:57:56,51.8,4018025271936,2,Rural


In [8]:
#1 Get the number of rides for urban cities.
urban_ride_count = urban_cities_df.groupby(["type"]).sum()["ride_id"]
urban_ride_count.head()

type
Urban    7919412664056093
Name: ride_id, dtype: int64

In [9]:
#1 Get the number of rides for suburban cities.
suburban_ride_count = suburban_cities_df.groupby(["type"]).sum()["ride_id"]
suburban_ride_count.head()

type
Suburban    3106884522576766
Name: ride_id, dtype: int64

In [10]:
#1 Get the number of rides for rural cities.
rural_ride_count = rural_cities_df.groupby(["type"]).count()["ride_id"]
rural_ride_count.head()

type
Rural    125
Name: ride_id, dtype: int64

In [11]:
#2 Get the total divers for each city type
#Get the total driver count for urban cities.
urban_driver_count = urban_cities_df.groupby(["type"]).sum()["driver_count"]
urban_driver_count.head()

type
Urban    59602
Name: driver_count, dtype: int64

In [12]:
#2 Get the total divers for each city type
#Get the total driver count for suburban cities.
suburban_driver_count = suburban_cities_df.groupby(["type"]).sum()["driver_count"]
suburban_driver_count.head()

type
Suburban    8570
Name: driver_count, dtype: int64

In [13]:
#2 Get the total divers for each city type
#Get the total driver count for rural cities.
rural_driver_count = rural_cities_df.groupby(["type"]).sum()["driver_count"]
rural_driver_count.head()

type
Rural    537
Name: driver_count, dtype: int64

In [14]:
#3 Get the total number of fares for each city type
#Get the total number of fares for urban cities
urban_fare_count = urban_cities_df.groupby(["type"]).sum()["fare"]
urban_driver_count.head()

type
Urban    59602
Name: driver_count, dtype: int64

In [15]:
#3 Get the total number of fares for each city type
#Get the total number of fares for suburban cities
suburban_fare_count = suburban_cities_df.groupby(["type"]).sum()["fare"]
suburban_fare_count.head()

type
Suburban    19356.33
Name: fare, dtype: float64

In [16]:
#3 Get the total number of fares for each city type
#Get the total number of fares for rural cities
rural_fare_count = rural_cities_df.groupby(["type"]).sum()["fare"]
rural_fare_count.head()

type
Rural    4327.93
Name: fare, dtype: float64

In [17]:
#4 Calculate the average fare per ride by city type by dividing the sum of all the fares by the total rides
# Get the sum of the fares for the urban city type.

urban_sum_of_fares= urban_cities_df.groupby(["type"]).sum()["fare"]
urban_sum_of_fares
#average fare per urban city ride
average_urban_fare = (urban_sum_of_fares/urban_ride_count)
average_urban_fare.head()

type
Urban    5.032492e-12
dtype: float64

In [18]:
#4 Calculate the average fare per ride by city type by dividing the sum of all the fares by the total rides
# Get the sum of the fares for the suburban city type.

suburban_sum_of_fares= suburban_cities_df.groupby(["type"]).sum()["fare"]
suburban_sum_of_fares
#average fare per urban city ride
average_suburban_fare = (suburban_sum_of_fares/suburban_ride_count)
average_suburban_fare.head()

type
Suburban    6.230141e-12
dtype: float64

In [19]:
#4 Calculate the average fare per ride by city type by dividing the sum of all the fares by the total rides
# Get the sum of the fares for the rural city type.

rural_sum_of_fares= rural_cities_df.groupby(["type"]).sum()["fare"]
rural_sum_of_fares
#average fare per urban city ride
averge_suburban_fare = (rural_sum_of_fares/rural_ride_count)
average_suburban_fare.head()

type
Suburban    6.230141e-12
dtype: float64

In [20]:
#5 Get the average fare per driver for each city type
# Get the average fare per driver for the urban city type

urban_sum_of_fares= urban_cities_df.groupby(["type"]).sum()["fare"]
urban_sum_of_fares
# Get the average fare per driver for the urban city type
urban_average_fare_per_driver = (urban_sum_of_fares/urban_driver_count)
urban_average_fare_per_driver.head()

type
Urban    0.668675
dtype: float64

In [21]:
#5 Get the average fare per driver for each city type
# Get the average fare per driver for the suburban city type

suburban_sum_of_fares= suburban_cities_df.groupby(["type"]).sum()["fare"]
suburban_sum_of_fares
# Get the average fare per driver for the urban city type
suburban_average_fare_per_driver = (suburban_sum_of_fares/suburban_driver_count)
suburban_average_fare_per_driver.head()

type
Suburban    2.258615
dtype: float64

In [22]:
#5 Get the average fare per driver for each city type
# Get the average fare per driver for the rural city type

rural_sum_of_fares= rural_cities_df.groupby(["type"]).sum()["fare"]
rural_sum_of_fares
# Get the average fare per driver for the urban city type
rural_average_fare_per_driver = (rural_sum_of_fares/rural_driver_count)
rural_average_fare_per_driver.head()

type
Rural    8.05946
dtype: float64

In [35]:
#6 Create PyBer summary DataFrame
# Combine the data into a single dataset
pyber_summary_df = pd.merge(ride_data_df, city_data_df, how="left", on=["city", "city"])



#Display the DataFrame
pyber_summary_df.head()


Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873,5,Urban
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577,72,Urban
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003,57,Urban
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178,34,Urban
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344,46,Urban


In [36]:
#Get the names of the columns
pyber_summary_df.columns


Index(['city', 'date', 'fare', 'ride_id', 'driver_count', 'type'], dtype='object')

In [None]:
pyber_summary_df.pivot(index = ["Total Rides", "Total Drivers", "Total Fares", "Average Fare per Ride", "Average Fare per Driver"],
values = "type", columns = ["Urban", "Suburban", "Rural"]

In [24]:
#7 Cleaning up the DataFrame
#Delete index name
pyber_summary_df.index.name = None
pyber_summary_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873,5,Urban
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577,72,Urban
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003,57,Urban
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178,34,Urban
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344,46,Urban


In [30]:
#8 Format the columns


KeyError: 'Total Rides'