In [1]:
import time
import pandas as pd
import numpy as np

In [2]:
CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }
cities = ['chicago', 'new york city', 'washington' ]
months = ['january','february','march','april','may','june']
days = ['monday','tuesday','wednesday','thursday','friday','saturday','sunday']

In [3]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # get user input for city (chicago, new york city, washington).
    while True:
        city = input("Which city would you like to analyze? \nchicago,new york city or washington? \nPlease enter here: ").lower()
        if city in cities:
            break
        else:
            print("Sorry, I didn't recognize that, please enter again: ")
    # get user input for month (all, january, february, ... , june)
    while True:
        month = input("Which month would you like to analyze? \njanuary,february,march,april,may or june or maybe all? \nPlease enter here: ").lower()
        if month in months or month == 'all':
            break
        else:
            print("Sorry, I didn't recognize that, please enter again: ")

    # get user input for day of week (all, monday, tuesday, ... sunday)
    while True:
        day = input("Which day of week would you like to explore? \nmonday,tuesday,wednesday,thursday,friday,saturday or sunday or all of them? \nPlease enter here: ").lower()
        if day in days or day == 'all':
            break
        else:
            print("Sorry, I didn't recognize that, please enter again: ")
    print('-'*40)
    return city, month, day

In [4]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    # load data file into a dataframe
    df = pd.read_csv(CITY_DATA[city])

    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # extract month and day of week from Start Time to create new columns
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.weekday_name

    # filter by month if applicable
    if month != 'all':
        # use the index of the months list to get the corresponding int
        month = months.index(month) + 1

        # filter by month to create the new dataframe
        df = df[df['month'] == month]

    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        df = df[df['day_of_week'] == day.title()]
    return df

In [5]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # display the most common month

    print('The most common month: {}'.format(df['month'].mode()[0]))
    # display the most common day of week

    print('The most common day of week: {}'.format(df['day_of_week'].mode()[0]))
    # display the most common start hour
    df['hour'] = df['Start Time'].dt.hour
    print('The most common hour: {}'.format(df['hour'].mode()[0]))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [6]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    print('The most commonly used start station: '.format(df['Start Station'].mode()[0]))
    # display most commonly used end station
    print('The most commonly used end station: '.format(df['End Station'].mode()[0]))

    # display most frequent combination of start station and end station trip
    df['trip'] = 'from ' + df['Start Station'] + ' to ' + df['End Station'] 
    print('The most frequent combination of start station and end station trip: '.format(df['trip'].mode()[0]))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [7]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    print('The total travel time is: {}'.format(df['Trip Duration'].sum()))

    # display mean travel time
    print('The mean travel time is: {}'.format(df['Trip Duration'].mean()))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [8]:
def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    counts_of_user_types = df['User Type'].value_counts()
    print("The counts of user types: ", counts_of_user_types)
    # Display counts of gender
    counts_of_gender = df['Gender'].value_counts()
    print('The counts of gender: ', counts_of_gender)
    # Display earliest, most recent, and most common year of birth
    earliest_year = df['Birth Year'].min()
    most_recent_year = df['Birth Year'].max()
    most_common_year = df['Birth Year'].mode()[0]
    print('The most earliest year of birth: {}, the most recent year of birth: {}, the most common year of birth: {}'.format(earliest_year, most_recent_year, most_common_year))


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [9]:
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()

Hello! Let's explore some US bikeshare data!
Which city would you like to analyze? 
chicago,new york city or washington? 
Please enter here: chicago
Which month would you like to analyze? 
january,february,march,april,may or june or maybe all? 
Please enter here: may
Which day of week would you like to explore? 
monday,tuesday,wednesday,thursday,friday,saturday or sunday or all of them? 
Please enter here: monday
----------------------------------------

Calculating The Most Frequent Times of Travel...

The most common month: 5
The most common day of week: Monday
The most common hour: 17

This took 0.03225421905517578 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

The most commonly used start station: 
The most commonly used end station: 
The most frequent combination of start station and end station trip: 

This took 0.03360700607299805 seconds.
----------------------------------------

Calculating Trip Duration...

The total trav