In [8]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # TO DO: get user input for city (chicago, new york city, washington). 
    #HINT: Use a while loop to handle invalid inputs
    while True:
        #to make user input case insensitive using .lower() func to match data
        city_given = input('\nWould you like to see data for Chicago, New York City, or Washington?\n').lower()
        #if condition to check for data value and .lower() to make sure for validation
        if city_given in CITY_DATA.keys():
            break
        else:
            print('Sorry, enter valid city name(chicago, new york city, or washington).')

    # TO DO: get user input for month (all, january, february, ... , june)
    while True:
        #using .title() or .lower() to match user input with data created 
        month_given = input('\nWhich month? January,February, March, April, May, or June?\n').lower()
        if month_given in ['january', 'february', 'march', 'april', 'may', 'june', 'all']:
            break
        else:
            print('Please enter month name as mentioned in previous message.')
    #put data in variable to make code more readable 
    days = ['Saturday', 'Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'All']
    # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
    while True:
        #using .title() to match user input with data in code 
        day_given = input('\nWhich day? Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, or Sunday?\n').title()
        if day_given in days:
            break
        else:
            print('Please enter name of day as mentioned in previous message.')
            
    print('\nThis is Bike Share Data Story for {} in {} month and day of {}\n'.format(city_given, month_given, day_given))
    print('-'*40)
    return city_given, month_given, day_given


def load_data(city_given, month_given, day_given):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    #load data file into a DataFrame
    df = pd.read_csv(CITY_DATA[city_given])
    
    #convert the start time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    
    '''Create month and day_of_week columns. Convert the "Start Time" column to datetime and extract the month number 
    and weekday name into separate columns using the datetime module.'''
    # extract month and day of week from Start Time to create new columns
    #use the datetime module to extract month and day of week from Start Time column
    df['month'] = df['Start Time'].dt.month_name()
    df['day_of_week'] = df['Start Time'].dt.day_name()
    
    #filter by month if applicable
    if month_given != 'all':
        #use the index of the months list to get corresponding it
        df = df[df['month'].str.startswith(month_given.title())]
        
    #filter by day of week if applicable
    if day_given != 'All':
        #filter by day of week to create the new dataframe
        df = df[df['day_of_week'].str.startswith(day_given.title())]
    
    return df


def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # TO DO: display the most common month
    df['month'] = df['Start Time'].dt.month_name()
    most_common_month = df['month'].mode()[0]

    # TO DO: display the most common day of week
    df['day_of_week'] = df['Start Time'].dt.day_name()
    most_common_day = df['day_of_week'].mode()[0]

    # TO DO: display the most common start hour
    df['hour'] = df['Start Time'].dt.hour
    most_common_hour = df['hour'].mode()[0]

    print('Rush Month is: {} \nRush Day is: {} \nRush Hour is: {} \n'.format(most_common_month, most_common_day,                                                                                          most_common_hour))
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station
    best_start = df['Start Station'].mode()[0]

    # TO DO: display most commonly used end station
    best_destination = df['End Station'].mode()[0]

    # TO DO: display most frequent combination of start station and end station trip
    df['start_end_station'] = df['Start Station'] + 'to' + df['End Station']
    best_trip = df['start_end_station'].mode()[0]

    print('\nBest Station people like riding from is: {} \nBest Destination for people is: {} \nBest Trip for people is: {}\n'.format(best_start, best_destination, best_trip))
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time
    total_travel_time = df['Trip Duration'].sum()

    # TO DO: display mean travel time
    average_travel_time = df['Trip Duration'].mean()
    
    print('Total Time people rides bikes: {} sec\nAverage Time of a bike trip is: {} sec\n'.format(total_travel_time,average_travel_time))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types
    users = df['User Type'].value_counts().to_frame()
    print('Types of Bikes Users: ', users)

    # TO DO: Display counts of gender
    #using try/except because this data is missed for washington
    try:
        print('Let`s discover men or women use bikes more?: ', df['Gender'].value_counts())
        # TO DO: Display earliest, most recent, and most common year of birth
        print('Oldest person used bike share was born in year: ',df['Birth Year'].min())
        print('Youngest person used bike share was born in year: ',df['Birth Year'].max())
        print('Most people used bike share were born in year:', df['Birth Year'].value_counts().idxmax())
        
    except:
        print('Gender Data is missed for washington!')

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

#Ask user if like to display raw data 5 rows by 5 rows
def raw_data_display(df):
    ask_display = input('Do you like dispaly 5 rows of raw data? yes or no').lower()
    start_indx = 0
    while ask_display == 'yes':
        print(df.iloc[start_indx : start_indx+5])
        start_indx += 5
        ask_display = input('Do you like display more 5 rows? yes or no').lower()
        
    return df

def main():
    while True:
        city_given, month_given, day_given = get_filters()
        df = load_data(city_given, month_given, day_given)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        raw_data_display(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
    main()


Hello! Let's explore some US bikeshare data!

This is Bike Share Data Story for chicago in all month and day of All

----------------------------------------

Calculating The Most Frequent Times of Travel...

Rush Month is: June 
Rush Day is: Tuesday 
Rush Hour is: 17 


This took 0.44222378730773926 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...


Best Station people like riding from is: Streeter Dr & Grand Ave 
Best Destination for people is: Streeter Dr & Grand Ave 
Best Trip for people is: Lake Shore Dr & Monroe SttoStreeter Dr & Grand Ave


This took 0.2854576110839844 seconds.
----------------------------------------

Calculating Trip Duration...

Total Time people rides bikes: 280871787 sec
Average Time of a bike trip is: 936.23929 sec


This took 0.0 seconds.
----------------------------------------

Calculating User Stats...

Types of Bikes Users:              User Type
Subscriber     238889
Customer        61110
Dependent 