In [3]:
import time
import pandas as pd
import numpy as np
import datetime as dt


In [4]:
CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }



In [5]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    city = input("Enter name of the city to analyze:").lower()
    while city not in ["chicago", "new york city", "washington"]:
        city = input("""Invalid city, please choose name of the city from ('Chicago','New York City','Washington'):""").lower()
    # get user input for month (all, january, february, ... , june)
    month = input("""Enter name of the month to filter by, or "all" to apply no month filter:""").lower()
    while month not in ["all",'january', 'february', 'march', 'april', 'may', 'june']:
        city = input("""Invalid month, please choose name of the month from ('January', 'February', 'March', 'April', 'May', 'June'):""").lower()

    # get user input for day of week (all, monday, tuesday, ... sunday)
    day = input("""Enter name of the day of week to filter by, or "all" to apply no day filter:""").lower()

    print('-'*40)
    return city, month, day



In [6]:

def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    df = pd.read_csv(CITY_DATA[city])

    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # extract month and day of week from Start Time to create new columns
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()
    df['hour'] = df['Start Time'].dt.hour    
    
    
    if month != "all":
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1
        df = df[df["month"]==month]
    if day != "all":
        df = df[df["day_of_week"]==day.title]
    
    
    return df



In [7]:

def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # display the most common month
    print("Most common month: " + str(df["month"].mode()[0]))

    # display the most common day of week
    print("Most common day of the week: " + str(df["day_of_week"].mode()[0]))

    # display the most common start hour
    print("Most common hour: " + str(df["hour"].mode()[0]))
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


In [8]:

def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    print("Most commonly used start station: " + str(df["Start Station"].mode()[0]))    

    # display most commonly used end station
    print("Most commonly used end station: " + str(df["End Station"].mode()[0])) 

    # display most frequent combination of start station and end station trip
    print("Most frequent combination of start station and end station trip: " + str(df.groupby(["Start Station","End Station"]).size().idxmax())) 

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)



In [9]:


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    print("Total travel time: " + str(df["Trip Duration"].sum()))

    # display mean travel time
    print("Mean travel time: " + str(df["Trip Duration"].mean()))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)



In [10]:

def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    print("Counts of user type:")
    print(df["User Type"].value_counts())
    # Display counts of gender
    
    if 'Gender' in df.columns:
        print("Counts of user type:/n")
        print(df["Gender"].value_counts())
    else:
        print('Gender stats cannot be calculated because Gender does not appear in the dataframe')
    
    # Display earliest, most recent, and most common year of birth


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [11]:
def display_raw_data(df):
    view_data = input("Would you like to view 5 rows of individual trip data? Enter yes or no?")
    start_loc = 0
    while view_data == "yes":
        
        print(df.iloc[0:5,:])
        start_loc += 5
        view_data = input("Do you wish to continue? Enter yes or no? ").lower()

In [15]:

def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        display_raw_data(df)
        
        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
    main()



Hello! Let's explore some US bikeshare data!
Enter name of the city to analyze:chicago
Enter name of the month to filter by, or "all" to apply no month filter:all
Enter name of the day of week to filter by, or "all" to apply no day filter:all
----------------------------------------

Calculating The Most Frequent Times of Travel...

Most common month: 6
Most common day of the week: Tuesday
Most common hour: 17

This took 0.0229947566986084 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

Most commonly used start station: Streeter Dr & Grand Ave
Most commonly used end station: Streeter Dr & Grand Ave
Most frequent combination of start station and end station trip: ('Lake Shore Dr & Monroe St', 'Streeter Dr & Grand Ave')

This took 0.09722304344177246 seconds.
----------------------------------------

Calculating Trip Duration...

Total travel time: 280871787
Mean travel time: 936.23929

This took 0.0008137226104736328 seconds.
-------