In [1]:
import time
import pandas as pd
import numpy as np

In [2]:
CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

In [3]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    while True:
        city=input('Enter City: ')
        if city=='chicago' or city=='new york city' or city=='washington':
            break
        else:
            print('Invalid Input! State whether the city is chicago, new york city or washington, all in lower case')
            

    # get user input for month (all, january, february, ... , june)
    while True:
        month=input('Enter Month: ')
        if month=='all' or month=='january' or month=='february' or month=='march' or month=='april' or month=='may' or month=='june':
            break
        else:
            print('Invalid Input! State whether the month is all, january, february,..., june all in lower case')

    # get user input for day of week (all, monday, tuesday, ... sunday)
    while True:
        day=input('Enter Day of Week: ')
        if day=='all' or day=='monday' or day=='tuesday' or day=='wednesday' or day=='thursday' or day=='friday' or day=='saturday' or day=='sunday':
            break
        else:
            print('Invalide Input! State whether the day is all,  monday, tuesday,...,  sunday all in lower case')

    print('-'*40)
    return city, month, day


In [4]:
def load_data(city, month, day):
    day = day[0].upper()+day[1:]
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - pandas DataFrame containing city data filtered by month and day
    """
    
    # load data file into a dataframe
    
    for key,value in CITY_DATA.items():
        if key==city:
            df=pd.read_csv(value)
    

    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # extract month and day of week from Start Time to create new columns
    df['month'] = pd.DatetimeIndex(df['Start Time']).month
    df['day_of_week'] = pd.DatetimeIndex(df['Start Time']).day


    # filter by month if applicable
    if month != 'all':
        # use the index of the months list to get the corresponding int
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month)+1
    
        # filter by month to create the new dataframe
        df = df[df['Start Time'].map(lambda x: x.month)==month]

    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        df = df[df['Start Time'].dt.day_name()==day]
    
    return df
    

In [5]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()
    
    # display the most common month
    print('The most common month is {}\n'.format(df['month'].mode()))

    # display the most common day of week
    print('The most common day of the week is {}\n'.format(df['day_of_week'].mode()))
    
    df['hour'] = pd.DatetimeIndex(df['Start Time']).hour
    # display the most common start hour
    print('The most common hour is {} hour\n'.format(df['hour'].mode()))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


In [6]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    print('The most commonly used start station is {}\n'.format(df['Start Station'].mode()))

    # display most commonly used end station
    print('The most commonly used end station is {}\n'.format(df['End Station'].mode()))

    # display most frequent combination of start station and end station trip
    print('The most frequent combination of start station and end station trip is {}\n'.format(df.groupby(['Start Station','End Station']).size().idxmax()))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [7]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    print('The total travel time is {}\n'.format(df['Trip Duration'].sum()))

    # display mean travel time
    print('The mean travel time is {}\n'.format(df['Trip Duration'].mean()))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


In [8]:
def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    print('The counts of user types are: {}\n'.format(df['User Type'].value_counts()))

    # Display counts of gender
    print('The counts of  gender are: {}\n'.format(df['Gender'].value_counts()))

    # Display earliest, most recent, and most common year of birth
    print('The earliest year of birth is {}\n'.format(int(df['Birth Year'].min())))
    print('The most recent year of birth is {}\n'.format(int(df['Birth Year'].max())))
    print('The most common year of birth is {}\n'.format(int(df['Birth Year'].mode())))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [9]:
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break
        else:
            continue

In [10]:
if __name__ == "__main__":
	main()

Hello! Let's explore some US bikeshare data!
Enter City: chicago
Enter Month: march
Enter Day of Week: friday
----------------------------------------

Calculating The Most Frequent Times of Travel...

The most common month is 0    3
dtype: int64

The most common day of the week is 0    24
dtype: int64

The most common hour is 0    17
dtype: int64 hour


This took 0.0 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

The most commonly used start station is 0    Clinton St & Washington Blvd
dtype: object

The most commonly used end station is 0    Clinton St & Washington Blvd
dtype: object

The most frequent combination of start station and end station trip is ('Streeter Dr & Grand Ave', 'Streeter Dr & Grand Ave')


This took 0.03125143051147461 seconds.
----------------------------------------

Calculating Trip Duration...

The total travel time is 4344099

The mean travel time is 747.3075864441769


This took 0.0 seconds.
-----------