In [2]:
import time
import pandas as pd
import numpy as np

In [3]:
CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }
MONTH_DATA = ['all','january', 'february', 'march', 'april', 'may', 'june']
DAY_DATA = [ 'all','monday', 'tuesday', 'wednesday', 'friday', 'saturday', 'sunday']


In [4]:
LINE_LEN = 90

# print long string with repeating char, used to separate sections of output
print_line = lambda char: print(char[0] * LINE_LEN)
def get_filter_city():
    # build and display the list of cities for which we have datasets
    cities_list = []
    num_cities = 0

    for a_city in CITY_DATA:
        cities_list.append(a_city)
        num_cities += 1
        print('        {0:20}. {1}'.format(num_cities, a_city.title()))

    # ask user to input a number for a city from the list; easier for user than string input
    while True:
        try:
            city_num = int(input("\n    Enter a number for the city (1 - {}):  ".format(len(cities_list))))
        except:
            continue

        if city_num in range(1, len(cities_list)+1):
            break

    # get the city's name in string format from the list
    city = cities_list[city_num - 1]
    return city


def get_filter_month():
    while True:
        try:
            month = input("    Enter the month with January=1, June=6 or 'a' for all:  ")
        except:
            print("        ---->>  Valid input:  1 - 6, a")
            continue

        if month == 'a':
            month = 'all'
            break
        elif month in {'1', '2', '3', '4', '5', '6'}:
            # reassign the string name for the month
            month = MONTH_DATA[int(month) - 1]
            break
        else:
            continue
    
    return month


def get_filter_day():

    while True:
        try:
            day = input("    Enter the day with Monday=1, Sunday=7 or 'a' for all:  ")
        except:
            print("        ---->>  Valid input:  1 - 7, a")
            continue

        if day == 'a':
            day = 'all'
            break
        elif day in {'1', '2', '3', '4', '5', '6', '7'}:
           
            day = DAY_DATA [int(day) - 1]    
            break
        else:
            continue

    return day


def get_filters():

    print('\n  Hello! Let\'s explore some US bikeshare data!\n')
    # get user input for city (chicago, new york city, washington).

    city = get_filter_city()

    # get user input for month (all, january, february, ... , june)
    month = get_filter_month()

    # get user input for day of week (all, monday, tuesday, ... sunday)
    day = get_filter_day()

    return city, month, day





def load_data(city, month, day):
    start_time = time.time()

    df = pd.read_csv(CITY_DATA[city])


    df['Start Time'] = pd.to_datetime(df['Start Time'], errors='coerce')

    
    df['month'] = df['Start Time'].dt.month                 
    df['day_of_week'] = df['Start Time'].dt.dayofweek      
    df['hour'] = df['Start Time'].dt.hour                   

    init_total_rides = len(df)
    filtered_rides = init_total_rides  

   
    if month != 'all':
      
        month_i = MONTHS.index(month) + 1     
    
        df = df[df.month == month_i]
        month = month.title()

    # filter by day of week if applicable
    if day != 'all':
        # use the index of the WEEKDAYS list to get the corresponding int
        day_i = WEEKDAYS.index(day)         # index() returns 0-based, matches df

        # filter by day of week to create the new dataframe
        df = df[df.day_of_week == day_i]
        day = day.title()



    return df



def time_stats(df):
    
    print('  Most Frequent Times of Travel...')
    start_time = time.time()

    month = MONTH_DATA[df['month'].mode()[0] - 1].title()
    print('    Month:               ', month)

    # display the most common day of week
    common_day = df['day_of_week'].mode()[0]        
    common_day = DAY_DATA[common_day].title()
    print('    Day of the week:     ', common_day)

def station_stats(df):
   

    print('  Most Popular Stations and Trip...')
    start_time = time.time()

    filtered_rides = len(df)

    # display most commonly used start station
    start_station = df['Start Station'].mode()[0]
    start_station_trips = df['Start Station'].value_counts()[start_station]

    print('    Start station:       ', start_station)
    print('{0:30}{1}/{2} trips'.format(' ', start_station_trips, filtered_rides))

    # display most commonly used end station
    end_station = df['End Station'].mode()[0]
    end_station_trips = df['End Station'].value_counts()[end_station]

    print('    End station:         ', end_station)
    print('{0:30}{1}/{2} trips'.format(' ', end_station_trips, filtered_rides))

    # display most frequent combination of start station and end station trip
    # group the results by start station and end station
    df_start_end_combination_gd = df.groupby(['Start Station', 'End Station'])
    most_freq_trip_count = df_start_end_combination_gd['Trip Duration'].count().max()
    most_freq_trip = df_start_end_combination_gd['Trip Duration'].count().idxmax()

    print('    Frequent trip:        {}, {}'.format(most_freq_trip[0], most_freq_trip[1]))
    print('{0:30}{1} trips'.format(' ', most_freq_trip_count))




def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('  User Stats...')
    start_time = time.time()

    # Display counts of user types
    user_types = df['User Type'].value_counts()
    for idx in range(len(user_types)):
        val = user_types[idx]
        user_type = user_types.index[idx]
        print('    {0:21}'.format((user_type + ':')), val)



    if 'Gender' in df.columns:
        # Display counts of gender
        genders = df['Gender'].value_counts()
        for idx in range(len(genders)):
            val = genders[idx]
            gender = genders.index[idx]
            print('    {0:21}'.format((gender + ':')), val)

    if 'Birth Year' in df.columns:
        # Display earliest, most recent, and most common year of birth
        print('    Year of Birth...')
        print('        Earliest:        ', int(df['Birth Year'].min()))
        print('        Most recent:     ', int(df['Birth Year'].max()))
        print('        Most common:     ', int(df['Birth Year'].mode()))
    
def display_data(df):
    index=0
    user_input=input('would you like to display 5 rows of raw data? ').lower()
    while user_input in ['yes','y','yep','yea'] and index+5 < df.shape[0]:
        print(df.iloc[index:index+5])
        index += 5
        user_input = input('would you like to display more 5 rows of raw data? ').lower()



In [5]:
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        user_stats(df)
        display_data(df)
     

        restart = input('\n    Would you like to restart? (y or n):  ')
        if restart.lower() != 'y':
            break


if __name__ == "__main__":
    main()


  Hello! Let's explore some US bikeshare data!

                           1. Chicago
                           2. New York City
                           3. Washington

    Enter a number for the city (1 - 3):  1
    Enter the month with January=1, June=6 or 'a' for all:  1
    Enter the day with Monday=1, Sunday=7 or 'a' for all:  1
  Most Frequent Times of Travel...
    Month:                May
    Day of the week:      Monday
  Most Popular Stations and Trip...
    Start station:        Streeter Dr & Grand Ave
                              6911/300000 trips
    End station:          Streeter Dr & Grand Ave
                              7512/300000 trips
    Frequent trip:        Lake Shore Dr & Monroe St, Streeter Dr & Grand Ave
                              854 trips
  User Stats...
    Subscriber:           238889
    Customer:             61110
    Dependent:            1
    Male:                 181190
    Female:               57758
    Year of Birth...
        Earliest: 