In [1]:
# Add Matplotlib inline magic command
%matplotlib inline
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Files to load
city_data_to_load = "Resources/city_data.csv"
ride_data_to_load = "Resources/ride_data.csv"

In [3]:
# Read the city data file and store it in a pandas DataFrame.
city_data_df = pd.read_csv(city_data_to_load)
city_data_df.head(10)


Unnamed: 0,city,driver_count,type
0,Richardfort,38,Urban
1,Williamsstad,59,Urban
2,Port Angela,67,Urban
3,Rodneyfort,34,Urban
4,West Robert,39,Urban
5,West Anthony,70,Urban
6,West Angela,48,Urban
7,Martinezhaven,25,Urban
8,Karenberg,22,Urban
9,Barajasview,26,Urban


In [4]:
# Read the ride data file and store it in a pandas DataFrame.
ride_data_df = pd.read_csv(ride_data_to_load)
ride_data_df.head(10)

Unnamed: 0,city,date,fare,ride_id
0,Lake Jonathanshire,1/14/2019 10:14,13.83,5739410000000.0
1,South Michelleport,3/4/2019 18:24,30.24,2343910000000.0
2,Port Samanthamouth,2/24/2019 4:29,33.44,2005070000000.0
3,Rodneyfort,2/10/2019 23:22,23.44,5149250000000.0
4,South Jack,3/6/2019 4:28,34.58,3908450000000.0
5,South Latoya,3/11/2019 12:26,9.52,1995000000000.0
6,New Paulville,2/27/2019 11:17,43.25,793208000000.0
7,Simpsonburgh,4/26/2019 0:43,35.98,111954000000.0
8,South Karenland,1/8/2019 3:28,35.09,7995620000000.0
9,North Jasmine,3/9/2019 6:26,42.81,5327640000000.0


In [5]:
# Combine the data into a single dataset
pyber_data_df = pd.merge(ride_data_df, city_data_df, how="left", on=["city", "city"])

# Display the DataFrame
pyber_data_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,1/14/2019 10:14,13.83,5739410000000.0,5,Urban
1,South Michelleport,3/4/2019 18:24,30.24,2343910000000.0,72,Urban
2,Port Samanthamouth,2/24/2019 4:29,33.44,2005070000000.0,57,Urban
3,Rodneyfort,2/10/2019 23:22,23.44,5149250000000.0,34,Urban
4,South Jack,3/6/2019 4:28,34.58,3908450000000.0,46,Urban


In [6]:
# Create the Urban/suburban/rural city DataFrame.
urban_cities_df = pyber_data_df[pyber_data_df["type"] =="Urban"]
suburban_cities_df =pyber_data_df[pyber_data_df["type"] == "Suburban"]
rural_cities_df = pyber_data_df[pyber_data_df["type"] == "Rural"]
urban_cities_df.head()
rural_cities_df.head()
suburban_cities_df.head()



Unnamed: 0,city,date,fare,ride_id,driver_count,type
1625,Barronchester,1/27/2019 3:08,27.79,6653620000000.0,11,Suburban
1626,East Kentstad,4/7/2019 19:44,18.75,6575960000000.0,20,Suburban
1627,Lake Omar,1/17/2019 21:33,21.71,966912000000.0,22,Suburban
1628,Myersshire,2/27/2019 17:38,17.1,5706770000000.0,19,Suburban
1629,West Hannah,4/19/2019 1:06,37.78,2273050000000.0,12,Suburban


In [7]:
city_type = city_data_df["type"].unique()
city_type

array(['Urban', 'Suburban', 'Rural'], dtype=object)

In [8]:
# Total Rides per city type

sum_rides_by_type=pyber_data_df.groupby(["type"]).count()["ride_id"]
sum_rides_by_type

type
Rural        125
Suburban     625
Urban       1625
Name: ride_id, dtype: int64

In [9]:
# Total Drivers per city type

sum_drivers_by_type=city_data_df.groupby(["type"]).sum()["driver_count"]
sum_drivers_by_type

type
Rural         78
Suburban     490
Urban       2405
Name: driver_count, dtype: int64

In [10]:
# Total fares for each city type.
sum_fares_by_type = pyber_data_df.groupby(["type"]).sum()["fare"]
sum_fares_by_type

type
Rural        4327.93
Suburban    19356.33
Urban       39854.38
Name: fare, dtype: float64

In [11]:
# Average Fare per Ride by city type

average_fare_by_type = sum_fares_by_type/sum_rides_by_type
average_fare_by_type.map("{:.2f}".format)

type
Rural       34.62
Suburban    30.97
Urban       24.53
dtype: object

In [12]:
# Average Fare per Driver by city type

average_fare_Driver_by_type = sum_fares_by_type/sum_drivers_by_type
average_fare_Driver_by_type.map("{:.2f}".format)

type
Rural       55.49
Suburban    39.50
Urban       16.57
dtype: object

In [13]:
# Adding a list of values with keys to create a new DataFrame.
final_summary_df = pd.DataFrame(
          [{"Total Rides": sum_rides_by_type,
          "Total Drivers": sum_drivers_by_type,
          "Total Fares": sum_fares_by_type,
          "Average Fare per Ride": average_fare_by_type,
          "Average Fare per Driver": average_fare_Driver_by_type}])
final_summary_df

Unnamed: 0,Total Rides,Total Drivers,Total Fares,Average Fare per Ride,Average Fare per Driver
0,type Rural 125 Suburban 625 Urban ...,type Rural 78 Suburban 490 Urban ...,type Rural 4327.93 Suburban 19356.33...,type Rural 34.623440 Suburban 30.9701...,type Rural 55.486282 Suburban 39.5027...


In [22]:
final_summary_df = pd.DataFrame(
       {"Total Rides": sum_rides_by_type,
         "Total Drivers": sum_drivers_by_type,
         "Total Fares": sum_fares_by_type,
         "Average Fare per Ride": average_fare_by_type,
         "Average Fare per Driver": average_fare_Driver_by_type})
final_summary_df


Unnamed: 0_level_0,Total Rides,Total Drivers,Total Fares,Average Fare per Ride,Average Fare per Driver
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Rural,125,78,4327.93,34.62344,55.486282
Suburban,625,490,19356.33,30.970128,39.502714
Urban,1625,2405,39854.38,24.525772,16.571468


In [19]:
final_summary_df = final_summary_df.rename(columns={'type': ''})

In [20]:
final_summary_df


Unnamed: 0_level_0,Total Rides,Total Drivers,Total Fares,Average Fare per Ride,Average Fare per Driver
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Rural,125,78,4327.93,34.62344,55.486282
Suburban,625,490,19356.33,30.970128,39.502714
Urban,1625,2405,39854.38,24.525772,16.571468


In [23]:
final_summary_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, Rural to Urban
Data columns (total 5 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Total Rides              3 non-null      int64  
 1   Total Drivers            3 non-null      int64  
 2   Total Fares              3 non-null      float64
 3   Average Fare per Ride    3 non-null      float64
 4   Average Fare per Driver  3 non-null      float64
dtypes: float64(3), int64(2)
memory usage: 224.0+ bytes


In [26]:
final_summary_df.index.name = None

In [27]:
final_summary_df


Unnamed: 0,Total Rides,Total Drivers,Total Fares,Average Fare per Ride,Average Fare per Driver
Rural,125,78,4327.93,34.62344,55.486282
Suburban,625,490,19356.33,30.970128,39.502714
Urban,1625,2405,39854.38,24.525772,16.571468
