In [16]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

MONTHS = ['january', 'february', 'march', 'april', 'may', 'june', 'all']

DAYS = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday', 'All']

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    city = input("Choose one City >>> (Chicago, New York City or Washington): ").lower()
    # check city name validation.
    while city not in CITY_DATA:
        print("Please Enter Valid City Name!")
        city = input("Choose one City >>> (Chicago, New York City or Washington): ").lower()
    
    # get user input for month (all, january, february, ... , june)
    month = input("Choose Month to Filter >>> (January, February, March, April, May, June or All): ").lower()
    # check month validation.
    while month not in MONTHS:
        print("Please Enter Valid Month!")
        month = input("Choose Month to Filter >>> (January, February, March, April, May, June or All): ").lower()
    
    # get user input for day of week (all, monday, tuesday, ... sunday)
    day = input("Choose Day to Filter >>> (Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday or All): ").title()
    # check day validation.
    while day not in DAYS:
        print("Please Enter Valid Day!")
        day = input("Choose Day to Filter >>> (Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday or All): ").title()
        
    print('-'*40)
    return city, month, day


def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    # read 'cityname.csv' file and load data into 'df' dataframe.
    df = pd.read_csv(CITY_DATA[city])
    
    # drop 'Unnamed: 0' column.
    df.drop(columns='Unnamed: 0', inplace=True)
    
    # fill missing values using forward fill method.
    df.fillna(method='ffill', inplace=True)
    
    # convert 'Start Time' column to datetime. 
    df['Start Time'] = pd.to_datetime(df['Start Time'])
  
    # convert 'End Time' column to datetime. 
    df['End Time'] = pd.to_datetime(df['End Time'])
    
    # extract month from 'Start Time' column to create 'Month' column.
    df['Month'] = df['Start Time'].dt.month
    
    # extract day from 'Start Time' column to create 'Day' column.
    df['Day'] = df['Start Time'].dt.day_name()
    
    # filter data by month.
    if month != 'all':
        month_index = MONTHS.index(month) + 1
        df = df[df['Month'] == month_index]
    
    # filter data by day.
    if day != 'All':
        df = df[df['Day'] == day]

    return df


def time_stats(df, month, day):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # check for month filter.
    if month == 'all':
        # display the most common month when there is no month filter.
        print('The Most Common Month is: ', MONTHS[df['Month'].mode()[0] - 1].title() )
    else:
        # display the filtered month.
        print('The Most Common Month is: ', month.title())

    # check for day filter.
    if day == 'All':
         # display the most common day of week when there is no day filter.
        print('The Most Common Day is: ', df['Day'].mode()[0])
    else:
        # display the filtered day of week.
        print('The Most Common Day is: ', day)

   
    # extract hour from 'Start Time' column to create ' Start Hour' column.
    df['Start Hour'] = df['Start Time'].dt.hour
    
    # display the most common start hour
    print('The Most Common Start Hour is: ', df['Start Hour'].mode()[0])

    # extract hour from 'End Time' column to create 'End Hour' column.
    df['End Hour'] = df['End Time'].dt.hour
    
    # display the most common End hour
    print('The Most Common End Hour is: ', df['End Hour'].mode()[0])
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

    
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    print('The Most Commonly used Start Station is: ' , df['Start Station'].mode()[0])

    # display most commonly used end station
    print('The Most Commonly used End Station is: ' , df['End Station'].mode()[0])

    # create 'Trip' column from 'Start Station' column and 'End Station' column.
    df['Trip'] = df['Start Station'] + " to " + df['End Station']
    
    # display most frequent combination of start station and end station trip.
    print('The Most Frequent Trip from: ', df['Trip'].mode()[0])

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time.
    print('The Total Travel Time is: ', df['Trip Duration'].sum(), "'s")

    # display mean travel time.
    print('The Mean Travel Time is: ', int(df['Trip Duration'].mean()), "'s")
    
    # display shortest travel time.
    print('The Shortest Travel Time is: ', df['Trip Duration'].min(), "'s")
    
    # display longest travel time.
    print('The Longest Travel Time is: ', df['Trip Duration'].max(), "'s")

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types.
    print(df['User Type'].value_counts(), '\n')

    # check for 'Gender' column. 
    if 'Gender' in df.columns:
        # Display counts of gender.
        print(df['Gender'].value_counts(), '\n')

    # check for 'Birth Year' column.
    if 'Birth Year' in df.columns:
        # display the oldest birth year.
        print('The Oldest Birth Year is: ', int(df['Birth Year'].min()))
        
        # display the most recent birth year.
        print('The Most Recent Birth Year is: ', int(df['Birth Year'].max()))
        
        # display the most common birth year.
        print('The Most Common Birth Year is: ', int(df['Birth Year'].mode()[0]))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

def get_raw_data(df):
        """ Displays continous 5 lines of raw data until user says no or there is no more raw data to display. """
        index=0
        # check user agreement.
        while input('\nDo you want to see 5 lines of row data? Enter yes to continue: ').lower() in ['y', 'yes']:
            # check whether there is more data to display or not
            if index < df.shape[0] - 5:
                print(df.iloc[index : index + 5, : ])
            else:
                print(df.iloc[index : , : ])
                break;
            index+=5
       
    


def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)
        time_stats(df, month, day)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        get_raw_data(df)
        
        restart = input('\nWould you like to restart? Enter yes to continue: ')
        if restart.lower() not in ['yes', 'y'] :
            break


if __name__ == "__main__":
	main()


Hello! Let's explore some US bikeshare data!
Choose one City >>> (Chicago, New York City or Washington): washington
Choose Month to Filter >>> (January, February, March, April, May, June or All): june
Choose Day to Filter >>> (Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday or All): friday
----------------------------------------

Calculating The Most Frequent Times of Travel...

The Most Common Month is:  June
The Most Common Day is:  Friday
The Most Common Start Hour is:  8
The Most Common End Hour is:  8

This took 0.029999971389770508 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

The Most Commonly used Start Station is:  Jefferson Dr & 14th St SW
The Most Commonly used End Station is:  Columbus Circle / Union Station
The Most Frequent Trip from:  Jefferson Dr & 14th St SW to Jefferson Dr & 14th St SW

This took 0.03000020980834961 seconds.
----------------------------------------

Calculating Trip Duration...

T


Do you want to see 5 lines of row data? Enter yes to continue: n

Would you like to restart? Enter yes to continue: y
Hello! Let's explore some US bikeshare data!
Choose one City >>> (Chicago, New York City or Washington): chicago
Choose Month to Filter >>> (January, February, March, April, May, June or All): all
Choose Day to Filter >>> (Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday or All): sunday
----------------------------------------

Calculating The Most Frequent Times of Travel...

The Most Common Month is:  June
The Most Common Day is:  Sunday
The Most Common Start Hour is:  15
The Most Common End Hour is:  15

This took 0.07300066947937012 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

The Most Commonly used Start Station is:  Streeter Dr & Grand Ave
The Most Commonly used End Station is:  Streeter Dr & Grand Ave
The Most Frequent Trip from:  Lake Shore Dr & Monroe St to Streeter Dr & Grand Ave

This too