In [None]:
"""
Process Flow:

1. Import data into a Pandas DataFrame.
    1.1  Set dependencies
    1.2  Establish files to load
    1.3  Read files
    1.4  Exploratory Data Analysis
2. Merge DataFrames.
3. Create a bubble chart that showcases the average fare versus the total number of rides with bubble size based on the total number of drivers for each city type, including urban, suburban, and rural.
4. Determine the mean, median, and mode for the following:
5. The total number of rides for each city type.
6. The average fares for each city type.
7. The total number of drivers for each city type.
8. Create box-and-whisker plots that visualize each of the following to determine if there are any outliers:
9. The number of rides for each city type.
10. The fares for each city type.
11. The number of drivers for each city type.
12. Create a pie chart that visualizes each of the following data for each city type:
13. The percent of total fares.
14. The percent of total rides.
15. The percent of total drivers.

"""

In [3]:
## 1. Set dependencies and import data into dataframes
# 1.1 Set dependencies

%matplotlib inline
# Dependencies
import matplotlib.pyplot as plt
import pandas as pd


In [4]:
# 1.2 Files to load
city_data_to_load = "Resources/city_data.csv"
ride_data_to_load = "Resources/ride_data.csv"

In [6]:
# 1.3 Read the city data and store as a DataFrame (pandas)

city_data_df = pd.read_csv(city_data_to_load)
city_data_df.head(10)

Unnamed: 0,city,driver_count,type
0,Richardfort,38,Urban
1,Williamsstad,59,Urban
2,Port Angela,67,Urban
3,Rodneyfort,34,Urban
4,West Robert,39,Urban
5,West Anthony,70,Urban
6,West Angela,48,Urban
7,Martinezhaven,25,Urban
8,Karenberg,22,Urban
9,Barajasview,26,Urban


In [7]:
ride_data_df = pd.read_csv(ride_data_to_load)
ride_data_df.head(10)

Unnamed: 0,city,date,fare,ride_id
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344
5,South Latoya,2019-03-11 12:26:48,9.52,1994999424437
6,New Paulville,2019-02-27 11:17:56,43.25,793208410091
7,Simpsonburgh,2019-04-26 00:43:24,35.98,111953927754
8,South Karenland,2019-01-08 03:28:48,35.09,7995623208694
9,North Jasmine,2019-03-09 06:26:29,42.81,5327642267789


In [8]:
# 1.4 Eploratory Data Analysis
# 1.4.1 Get columns and rows that are not null in the City DF

city_data_df.count()


city            120
driver_count    120
type            120
dtype: int64

In [9]:
city_data_df.isnull().sum()

city            0
driver_count    0
type            0
dtype: int64

In [10]:
city_data_df.dtypes

city            object
driver_count     int64
type            object
dtype: object

In [15]:
# How many cities are there per type? 
city_data_df["type"].unique()

array(['Urban', 'Suburban', 'Rural'], dtype=object)

In [16]:
sum(city_data_df["type"]=="Urban")

66

In [17]:
sum(city_data_df["type"]=="Suburban")

36

In [18]:
sum(city_data_df["type"]=="Rural")

18

In [20]:
# 1.4.2 Exploratory Data Analysis on the Riders DF
ride_data_df.count()

city       2375
date       2375
fare       2375
ride_id    2375
dtype: int64

In [21]:
ride_data_df.isnull().sum()

city       0
date       0
fare       0
ride_id    0
dtype: int64

In [22]:
ride_data_df.dtypes

city        object
date        object
fare       float64
ride_id      int64
dtype: object

In [24]:
## 2. Merge DataFrames
# The key in the 2 dfs is the city - merge using a left join (default is inner)

pyber_data_df = pd.merge(ride_data_df, city_data_df, how="left", on=['city', 'city'])

# Display the df
pyber_data_df

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873,5,Urban
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577,72,Urban
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003,57,Urban
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178,34,Urban
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344,46,Urban
...,...,...,...,...,...,...
2370,Michaelberg,2019-04-29 17:04:39,13.38,8550365057598,6,Rural
2371,Lake Latoyabury,2019-01-30 00:05:47,20.76,9018727594352,2,Rural
2372,North Jaime,2019-02-10 21:03:50,11.11,2781339863778,1,Rural
2373,West Heather,2019-05-07 19:22:15,44.94,4256853490277,4,Rural
