In [1]:
import time
import pandas as pd
import numpy as np

In [2]:
CITY_DATA = { 'Chicago': 'chicago.csv',
              'New_York city': 'new_york_city.csv',
              'Washington': 'washington.csv' }

cities = ['Chicago', 'New_York', 'Washington']
months = ['January', 'February', 'March', 'April', 'May', 'June', 'All']
days = ['All', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']

In [3]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    while True:
        try:
            print('Would you like to see data for Chicago, New York or Washington?')
            city = str(input('Which city would like to explore?:\n'))
            city = city.replace(' ', '_')
            city = city.title()
            
            if city in cities:
                print('You have entered {} as the city you would like to explore'.format(city))
                break
            else:
                raise ValueError

        
        except ValueError:
            print('Oops!, you have entered an incorrect city')

    # TO DO: get user input for month (all, january, february, ... , june)
    while True:
        try:
            print('Would you like to filter the data by month, both or none?')
            filter_by = str(input('Select filter:\n'))
            filter_by = filter_by.lower()
            
            if filter_by == 'both':
                print('Enter the month you would like to filter by:')
                month = str(input('Enter Month:\n'))
                month = month.title()
                
                if month in months:
                    print('You have entered {} as the month you would like to filter by'.format(month))
                
                day = str(input('Enter the day you would like to filter by:\n'))
                day = day.title()
                
                if day in days:
                    print('You have entered {} as the day you would like to filter by'.format(day))
                else:
                    raise ValueError
                    print('You have entered an incorrect day')
                
                
                break   
            
            elif filter_by == 'month':
                print('Enter the month you would like to filter by:')
                
                month = str(input('Enter Month:\n'))
                month = month.title()
                day = 'all'
                break
            
            elif filter_by == 'none':
                month = 'all'
                day = 'all'
                break    
            
            else:
                raise ValueError
        
        except ValueError:
            
            print('Oops!, a valid input has not been entered')

    # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)


    print('-'*40)
    return city, month, day

In [4]:
def load_data():
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    city, month, day = get_filters()
    
    df = pd.read_csv(CITY_DATA[city])
    
    """
    Creating new columns in the dataframe [df] to compute required statistics
    """
    
    df['Start Month'] = pd.to_datetime(df['Start Time']).dt.month_name()
    df['End Month'] = pd.to_datetime(df['End Time']).dt.month_name()
    df['Start Day'] = pd.to_datetime(df['Start Time']).dt.day_name()
    df['End Day'] = pd.to_datetime(df['End Time']).dt.day_name()
    df['Hour'] = pd.to_datetime(df['Start Time']).dt.hour
    df['Popular Time'] = pd.to_datetime(df['Start Time']).dt.time
    df['Station Combination'] = df['Start Station'] + ' to ' + df['End Station']
    
    #filter by month 
    if month != 'all':
        df = df[df['Start Month'] == month]
        
    if day != 'all':
        df = df[df['Start Day'] == day]

    return df

In [5]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # TO DO: display the most common month
    most_common_month = df['Start Month'].mode()[0]
    print('The most common month is:\n {}'.format(most_common_month))

    # TO DO: display the most common day of week
    most_common_day = df['Start Day'].mode()[0]
    print('The most common day is:\n {}'.format(most_common_day))

    # TO DO: display the most common start hour
    most_common_hour = df['Hour'].mode()[0]
    print('The most common start hour is:\n {}'.format(most_common_hour))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [6]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station
    popular_start_station = df['Start Station'].mode()[0]
    print('The most commonly used start station is:\n {}'.format(popular_start_station))

    # TO DO: display most commonly used end station
    popular_end_station = df['End Station'].mode()[0]
    print('The most commonly used end station is:\n {}'.format(popular_end_station))

    # TO DO: display most frequent combination of start station and end station trip
    popular_start_end_station = df['Station Combination'].mode()[0]
    print('The most frequent combination of start station and end station trip is:\n {}'.format(popular_start_end_station))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [7]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time
    total_travel_time = df['Trip Duration'].sum()
    print('The total travel time is:\n {}'.format(total_travel_time))

    # TO DO: display mean travel time
    avg_travel_time = df['Trip Duration'].mean()
    print('The average travel time is:\n {}'.format(avg_travel_time))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [8]:
def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types
    count_user_types = df['User Type'].value_counts().to_frame()
    print('The categories of users are:\n {}'.format(count_user_types))

    # TO DO: Display counts of gender
    # Since 'Gender' column is not present for the washington dataframe
    if 'Gender' in df:
        gender_count = df['Gender'].value_counts().to_frame()
        print('The gender count is:\n {}'.format(gender_count))
    
    else:
        print('...'*21)

    # TO DO: Display earliest, most recent, and most common year of birth
    # Since 'Birth Year' column is not present for the washington dataframe
    if 'Birth Year' in df:
        earliest_birth_year = df['Birth Year'].min()
        latest_birth_year = df['Birth Year'].max()
        popular_birth_year = df['Birth Year'].mode()[0]

        print('The earliest birth year is:\n {}'.format(earliest_birth_year))
        print('The most recent birth year is:\n {}'.format(latest_birth_year))
        print('The most common birth year is:\n {}'.format(popular_birth_year))
    
    else:
        print('...'*21)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


In [10]:
def main():
    while True:
        
        df = load_data()

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        
        i = -1
        while True:
            try:
                view_data = str(input('\n Would you like to view individual trip data? Type yes or no \n'))
                
                if view_data.lower() == 'yes':
                    i += 1
                    print(df.loc[i].to_frame())
                elif view_data.lower() == 'no':
                    break
                else:
                    raise ValueError
            
            except ValueError:
                print('This is not a valid input')
                        

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
    main()

Hello! Let's explore some US bikeshare data!
Would you like to see data for Chicago, New York or Washington?
