In [6]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

MONTHS = ("all", "january", "february", "march", "april", "may" , "june")
DAYS = ("all", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday")

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    city = input("Enter city (chicago, new york city, washington): ").lower()
    while city not in CITY_DATA:
        print("invalid city")
        city = input("Enter city (chicago, new york city, washington): ").lower()
    
    # get user input for month (all, january, february, ... , june)
    month = input("Enter month (all, january, february, ... , june): ").lower()
    while month not in MONTHS:
        print("invalid month")
        month = input("Enter month (all, january, february, ... , june): ").lower()
    
    # get user input for day of week (all, monday, tuesday, ... sunday)
    day = input("Enter day of week (all, monday, tuesday, ... sunday): ").lower()
    while day not in DAYS:
        print("invalid day")
        day = input("Enter day of week (all, monday, tuesday, ... sunday): ").lower()
    
    print('-'*40)
    return city, month, day


def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    
    df = pd.read_csv("data/" + CITY_DATA[city])
    df["Start Time"] = pd.to_datetime(df["Start Time"])
    df["End Time"] = pd.to_datetime(df["End Time"])
    
    if month == "all" and day == "all":
        return df
    
    if month != "all":
        cond = df["Start Time"].dt.month_name() == month.title()
        df = df[cond]
        
    if day != "all":
        cond = df["Start Time"].dt.day_name() == day.title()
        df = df[cond]
        
    # print(df.sample(5))
    
    return df


def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # display the most common month
    common = df["Start Time"].dt.month_name().mode()[0]
    print("the most common month:", common)

    # display the most common day of week
    common = df["Start Time"].dt.day_name().mode()[0]
    print("the most common day:", common)

    # display the most common start hour
    common = df["Start Time"].dt.hour.mode()[0]
    print("the most common start hour:", common)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    common = df["Start Station"].mode()[0]
    print("the most common start station:", common)
    
    # display most commonly used end station
    common = df["End Station"].mode()[0]
    print("the most common end station:", common)
    
    # display most frequent combination of start station and end station trip
    common = df[["Start Station", "End Station"]].value_counts().index[0]
    print("the most frequent combination of stations:", common)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    result = df["Trip Duration"].sum()
    print("total travel time:", result)
    
    # display mean travel time
    result = df["Trip Duration"].mean()
    print("mean travel time:", result)
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    result = df["User Type"].value_counts()
    print("counts of user types:\n", result)

    # Display counts of gender
    if "Gender" in df:
        result = df["Gender"].value_counts()
        print("counts of gender:\n", result)
    
    # Display earliest, most recent, and most common year of birth
    if "Birth Year" in df:
        print("most recent year of birth:", df["Birth Year"].max())
        print("earliest year of birth:", df["Birth Year"].min())
        print("most common year of birth:", df["Birth Year"].mode()[0])


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()


Hello! Let's explore some US bikeshare data!


Enter city (chicago, new york city, washington):  washington
Enter month (all, january, february, ... , june):  all
Enter day of week (all, monday, tuesday, ... sunday):  all


----------------------------------------

Calculating The Most Frequent Times of Travel...

the most common month: June
the most common day: Wednesday
the most common start hour: 8

This took 0.15698933601379395 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

the most common start station: Columbus Circle / Union Station
the most common end station: Columbus Circle / Union Station
the most frequent combination of stations: ('Jefferson Dr & 14th St SW', 'Jefferson Dr & 14th St SW')

This took 0.09302139282226562 seconds.
----------------------------------------

Calculating Trip Duration...

total travel time: 371183985.484
mean travel time: 1237.2799516133446

This took 0.0019969940185546875 seconds.
----------------------------------------

Calculating User Stats...

counts of user types:
 Subscriber    220786
Customer       79214
Name: User Type, dtype: int64

This took 0.01201629638671875 seconds.
--------------------------------


Would you like to restart? Enter yes or no.
 yes


Hello! Let's explore some US bikeshare data!


Enter city (chicago, new york city, washington):  chicago
Enter month (all, january, february, ... , june):  all
Enter day of week (all, monday, tuesday, ... sunday):  all


----------------------------------------

Calculating The Most Frequent Times of Travel...

the most common month: June
the most common day: Tuesday
the most common start hour: 17

This took 0.1580820083618164 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

the most common start station: Streeter Dr & Grand Ave
the most common end station: Streeter Dr & Grand Ave
the most frequent combination of stations: ('Lake Shore Dr & Monroe St', 'Streeter Dr & Grand Ave')

This took 0.10058450698852539 seconds.
----------------------------------------

Calculating Trip Duration...

total travel time: 280871787
mean travel time: 936.23929

This took 0.0020117759704589844 seconds.
----------------------------------------

Calculating User Stats...

counts of user types:
 Subscriber    238889
Customer       61110
Dependent          1
Name: User Type, dtype: int64
counts of gender:
 Male      181190
Female     57758
Name: Gender, dtype: int64


NameError: name 'printt' is not defined