# Pyber Challenge

### 4.3 Loading and Reading CSV files

In [2]:
# Add Matplotlib inline magic command
%matplotlib inline
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd

# File to Load (Remember to change these)
city_data_to_load = "Resources/city_data.csv"
ride_data_to_load = "Resources/ride_data.csv"

# Read the City and Ride Data
city_data_df = pd.read_csv(city_data_to_load)
ride_data_df = pd.read_csv(ride_data_to_load)

In [3]:
# show first 10 rows of city_data_df
city_data_df.head(10)

Unnamed: 0,city,driver_count,type
0,Richardfort,38,Urban
1,Williamsstad,59,Urban
2,Port Angela,67,Urban
3,Rodneyfort,34,Urban
4,West Robert,39,Urban
5,West Anthony,70,Urban
6,West Angela,48,Urban
7,Martinezhaven,25,Urban
8,Karenberg,22,Urban
9,Barajasview,26,Urban


In [4]:
# show first 10 rows of ride_data_df
ride_data_df.head(10)

Unnamed: 0,city,date,fare,ride_id
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344
5,South Latoya,2019-03-11 12:26:48,9.52,1994999424437
6,New Paulville,2019-02-27 11:17:56,43.25,793208410091
7,Simpsonburgh,2019-04-26 00:43:24,35.98,111953927754
8,South Karenland,2019-01-08 03:28:48,35.09,7995623208694
9,North Jasmine,2019-03-09 06:26:29,42.81,5327642267789


### Merge the DataFrames

In [5]:
# Combine the data into a single dataset
pyber_data_df = pd.merge(ride_data_df, city_data_df, how="left", on=["city", "city"])

# Display the data table for preview
pyber_data_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873,5,Urban
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577,72,Urban
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003,57,Urban
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178,34,Urban
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344,46,Urban


## Deliverable 1: Get a Summary DataFrame 

In [6]:
#  1. Get the total rides for each city type

# A. Create DataFrame for each type of city
urban_cities_df = pyber_data_df[pyber_data_df["type"] == "Urban"]
suburban_cities_df = pyber_data_df[pyber_data_df["type"] == "Suburban"]
rural_cities_df = pyber_data_df[pyber_data_df["type"] == "Rural"]

#B. Get the number of rides for urban cities.
sum_urban_ride_count = urban_cities_df.count()["ride_id"]

#C. Get number of rides for suburban cities.
sum_suburban_ride_count = suburban_cities_df.count()["ride_id"]

#D. Get number of rides for rural cities (which is an oxymoron).
sum_rural_ride_count = rural_cities_df.count()["ride_id"]

# Print results so I feel better 
print (sum_urban_ride_count, sum_suburban_ride_count, sum_rural_ride_count)


1625 625 125


In [7]:
# 2. Get the total drivers for each city type
sum_drivers_urban = city_data_df[city_data_df["type"] == "Urban"].sum()["driver_count"]
sum_drivers_suburban = city_data_df[city_data_df["type"] == "Suburban"].sum()["driver_count"]
sum_drivers_rural = city_data_df[city_data_df["type"] =="Rural"].sum()["driver_count"]

# Print results so I feel better 
print(sum_drivers_urban, sum_drivers_suburban, sum_drivers_rural)


2405 490 78


In [8]:
#  3. Get the total amount of fares for each city type
sum_fares_urban = (round(urban_cities_df.sum()["fare"],2))
sum_fares_suburban = (round(suburban_cities_df.sum()["fare"],2))
sum_fares_rural = (round(rural_cities_df.sum()["fare"],2))

# Print results so I feel better 
print(sum_fares_urban, sum_fares_suburban, sum_fares_rural)

39854.38 19356.33 4327.93


In [9]:
#  4. Get the average fare per ride for each city type. 
urban_avg_fare = (round(urban_cities_df.mean()["fare"],2))
suburban_avg_fare = (round(suburban_cities_df.mean()["fare"],2))
rural_avg_fare = (round(rural_cities_df.mean()["fare"],2))

# Print results so I feel better 
print(urban_avg_fare ,suburban_avg_fare, rural_avg_fare)


24.53 30.97 34.62


  
  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.


In [10]:
# 5. Get the average fare per driver for each city type. 
urban_avg_fare_per_driver = (round(sum_fares_urban / sum_drivers_urban, 2))
suburban_avg_fare_per_driver = (round(sum_fares_suburban / sum_drivers_suburban,2))
rural_avg_fare_per_driver = (round(sum_fares_rural / sum_drivers_rural,2))

# Print results so I feel better 
print(urban_avg_fare_per_driver, suburban_avg_fare_per_driver, rural_avg_fare_per_driver)


16.57 39.5 55.49


In [16]:
#  6. Create a PyBer summary DataFrame. 

pyber_data_df.describe()



Unnamed: 0,fare,ride_id,driver_count
count,2375.0,2375.0,2375.0
mean,26.753112,4887270000000.0,28.930105
std,12.109465,2907361000000.0,20.666306
min,4.05,321833700.0,1.0
25%,17.19,2381175000000.0,11.0
50%,26.6,4748541000000.0,23.0
75%,36.645,7461936000000.0,45.0
max,58.55,9991538000000.0,73.0


In [17]:
ride_data_df.describe()

Unnamed: 0,fare,ride_id
count,2375.0,2375.0
mean,26.753112,4887270000000.0
std,12.109465,2907361000000.0
min,4.05,321833700.0
25%,17.19,2381175000000.0
50%,26.6,4748541000000.0
75%,36.645,7461936000000.0
max,58.55,9991538000000.0


In [15]:
#summary of urban_cities_df
urban_cities_df.describe()

Unnamed: 0,fare,ride_id,driver_count
count,1625.0,1625.0,1625.0
mean,24.525772,4873485000000.0,36.678154
std,11.738649,2907440000000.0,20.075545
min,4.05,14588100000.0,3.0
25%,14.55,2400244000000.0,22.0
50%,24.64,4711188000000.0,37.0
75%,34.58,7451579000000.0,52.0
max,44.97,9991538000000.0,73.0


In [12]:
#summary of suburban_cities_df
suburban_cities_df.describe()

Unnamed: 0,fare,ride_id,driver_count
count,625.0,625.0,625.0
mean,30.970128,4971015000000.0,13.712
std,10.677508,2912410000000.0,8.042372
min,12.05,321833700.0,1.0
25%,21.97,2364253000000.0,5.0
50%,30.75,5053221000000.0,16.0
75%,39.83,7551674000000.0,21.0
max,49.96,9917744000000.0,25.0


In [13]:
#summary of rural_cities_df
rural_cities_df.describe()

Unnamed: 0,fare,ride_id,driver_count
count,125.0,125.0,125.0
mean,34.62344,4647746000000.0,4.296
std,14.558046,2887834000000.0,2.691156
min,10.11,26848730000.0,1.0
25%,19.9,2275364000000.0,1.0
50%,37.05,4023962000000.0,4.0
75%,47.07,7118047000000.0,7.0
max,58.55,9990581000000.0,9.0


In [9]:
#  7. Cleaning up the DataFrame. Delete the index name
pyber_summary_df.index.name = None

In [10]:
#  8. Format the columns.


## Deliverable 2.  Create a multiple line plot that shows the total weekly of the fares for each type of city.

In [11]:
# 1. Read the merged DataFrame


In [12]:
# 2. Using groupby() to create a new DataFrame showing the sum of the fares 
#  for each date where the indices are the city type and date.


In [13]:
# 3. Reset the index on the DataFrame you created in #1. This is needed to use the 'pivot()' function.
# df = df.reset_index()


In [14]:
# 4. Create a pivot table with the 'date' as the index, the columns ='type', and values='fare' 
# to get the total fares for each type of city by the date. 


In [15]:
# 5. Create a new DataFrame from the pivot table DataFrame using loc on the given dates, '2019-01-01':'2019-04-29'.



In [16]:
# 6. Set the "date" index to datetime datatype. This is necessary to use the resample() method in Step 8.
# df.index = pd.to_datetime(df.index)

In [17]:
# 7. Check that the datatype for the index is datetime using df.info()


In [18]:
# 8. Create a new DataFrame using the "resample()" function by week 'W' and get the sum of the fares for each week.


In [19]:
# 8. Using the object-oriented interface method, plot the resample DataFrame using the df.plot() function. 

# Import the style from Matplotlib.
from matplotlib import style
# Use the graph style fivethirtyeight.
style.use('fivethirtyeight')

