<a href="https://colab.research.google.com/github/Arksalone/Arksalone/blob/main/Lyft_ebikes_Applied_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

![](https://images.ctfassets.net/q8mvene1wzq4/33jAW8nvXF2evwe2XRXBsA/94650ab4152c8c114f4332585c33c948/Blog_Header__1200x670_.jpg?w=1000&q=60&fm=)

In [None]:
# import libraries
import pandas as pd
import numpy as np
import plotly.express as px

In [None]:
df = pd.read_csv('datasets/lyft_ebike_data.csv')

In [None]:
# preview data
df.head()

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,end_station_name,start_lat,start_lng,end_lat,end_lng,member_type,duration
0,BC1E266A04B3E04E,electric_bike,2020-06-07 14:13:14,2020-06-07 15:49:45,,Jersey St at Castro St,37.76,-122.42,37.750513,-122.43383,casual,96.0
1,3B8B2E6E2A0C70D9,electric_bike,2020-09-11 18:24:22,2020-09-11 18:43:54,,Julian St at The Alameda,37.33,-121.91,37.332301,-121.912321,member,19.0
2,E0D8A02D7FE1904A,electric_bike,2022-08-31 17:56:16,2022-08-31 17:56:56,Embarcadero BART Station (Beale St at Market St),Embarcadero BART Station (Beale St at Market St),37.791627,-122.397557,37.792251,-122.397086,Member,0.0
3,42135E7992B2FA61,electric_bike,2020-05-02 16:17:25,2020-05-02 16:38:46,Page St at Scott St,Randall St at Chenery St,37.77234,-122.435874,37.739842,-122.42544,casual,21.0
4,BD993A395DAF8E4E,electric_bike,2021-08-13 21:25:30,2021-08-13 21:31:14,,,37.34,-121.9,37.34,-121.9,member,5.0


In [None]:
# inspect all the unique values in the 'member_type' column
df['member_type'].unique()

array(['casual', 'member', 'Member', 'CASUAL', 'MEMBER', 'Casual',
       'mEMBER', 'cASUAL'], dtype=object)

In [None]:
# Function to clean member type
def clean_member_type(col_str):
    '''
    Standardizes the entries in this column so that they are all in upper case
    input: string
    output: string
    '''
    return col_str.upper()

In [None]:
# Create new column in dataframe
df['clean_member_type'] = df['member_type'].apply(clean_member_type)

In [None]:
# See new data
df.head()

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,end_station_name,start_lat,start_lng,end_lat,end_lng,member_type,duration,clean_member_type
0,BC1E266A04B3E04E,electric_bike,2020-06-07 14:13:14,2020-06-07 15:49:45,,Jersey St at Castro St,37.76,-122.42,37.750513,-122.43383,casual,96.0,CASUAL
1,3B8B2E6E2A0C70D9,electric_bike,2020-09-11 18:24:22,2020-09-11 18:43:54,,Julian St at The Alameda,37.33,-121.91,37.332301,-121.912321,member,19.0,MEMBER
2,E0D8A02D7FE1904A,electric_bike,2022-08-31 17:56:16,2022-08-31 17:56:56,Embarcadero BART Station (Beale St at Market St),Embarcadero BART Station (Beale St at Market St),37.791627,-122.397557,37.792251,-122.397086,Member,0.0,MEMBER
3,42135E7992B2FA61,electric_bike,2020-05-02 16:17:25,2020-05-02 16:38:46,Page St at Scott St,Randall St at Chenery St,37.77234,-122.435874,37.739842,-122.42544,casual,21.0,CASUAL
4,BD993A395DAF8E4E,electric_bike,2021-08-13 21:25:30,2021-08-13 21:31:14,,,37.34,-121.9,37.34,-121.9,member,5.0,MEMBER


In [None]:
# Function to calculate the cost of each ride
def ride_cost(df_row):
    '''
    Returns the total price for the ride
    from lyft's term of agreement.

    Member riders pay $0.20 per minute
    casual riders pay $0.30 per minute plus a $3.49 unlocking fee

    input: row of datafram with several columns
    output: cost of ride as numeric value
    '''
    if df_row['clean_member_type'] == 'MEMBER':
        return df['duration'] * 0.20
    else:
        return 0.30 * df['duration'] + 3.49

In [None]:
# Create a new column with the ride cost
df['ride_cost'] = df[['duration', 'clean_member_type']].apply(ride_cost, axis = 'columns')

In [None]:
# Generate statistics on the ride cost & duration of the two member types.
# For 'ride_cost' calculate the total value and the average ride cost
# For 'duration' calculate the average ride time
# For 'ride_id' count the number of rides on the platform
agg_values = {
    'ride_cost' : ['sum', 'mean'],
    'duration'  : ['mean'],
    'ride_id'   : ['count']
}

df.groupby('clean_member_type').agg(agg_value)

In [None]:
# Create some visualizations of the data
