In [1]:
import time
import pandas as pd
import numpy as np
import os


CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs

    while True:
        city = input("\nWhich of these cities would you like to filter: chicago, new york city or washington?\n").lower()
        if city.lower() not in ('chicago', 'new york city', 'washington'):
            print("Sorry, wrong input, do try again.")
            continue
        else:
            break

    # TO DO: get user input for month (all, january, february, ... , june)

    while True:
        month = input("\nWould you like to filter by month? If yes, type january, february, march, april, may, june or type 'all' if you want to view all the month at once.\n").lower()
        if month.lower() not in ('january', 'february', 'march', 'april', 'may', 'june', 'all'):
            print("Sorry, wrong input, do try again.")
            continue
        else:
            break
            
    # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)

    while True:
        day = input("\nAre you specific about a particular day you would like to check the data? If yes, do type sunday, monday, tuesday, wednesday, thursday, friday, saturday or type 'all' if you want all the days of the week.\n").lower()
        if day.lower() not in ('sunday', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'all'):
            print("Sorry, wrong input, do try again.")
            continue
        else:
            break

    print('-'*40)
    return city, month, day


def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """

     # load data file into a dataframe
    df = pd.read_csv(CITY_DATA[city])
   
    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # extract month and day of week from Start Time to create new columns
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()

    # filter by month if applicable
    if month != 'all':
        # use the index of the months list to get the corresponding int
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1

        # filter by month to create the new dataframe
        df = df[df['month'] == month]

    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        df = df[df['day_of_week'] == day.title()]

    return df


def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # TO DO: display the most common month

    popular_month = df['month'].mode()[0]
    print('The Most Common Month is', popular_month)

    # TO DO: display the most common day of week

    popular_day = df['day_of_week'].mode()[0]
    print('The Most Common day of the Week is', popular_day)

    # TO DO: display the most common start hour

    df['hour'] = df['Start Time'].dt.hour
    popular_hour = df['hour'].mode()[0]
    print('The Most Common Start Hour is', popular_hour)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station

    Start_Station = df['Start Station'].value_counts().idxmax()
    print('\nThe most Commonly used start station is', Start_Station)


    # TO DO: display most commonly used end station

    End_Station = df['End Station'].value_counts().idxmax()
    print('\nThe most Commonly used end station is', End_Station)


    # TO DO: display most frequent combination of start station and end station trip

    Combination_Station = df.groupby(['Start Station', 'End Station']).count()
    print('\nThe most frequently used combination of start station and end station trip are', Start_Station, " & ", End_Station, 'respectively')

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time

    Total_Travel_Time = sum(df['Trip Duration'])
    print('The total travel time is', Total_Travel_Time/86400, " Days")

    # TO DO: display mean travel time

    Mean_Travel_Time = df['Trip Duration'].mean()
    print('The mean travel time is', Mean_Travel_Time/60, " Minutes")
    

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types

    user_types = df['User Type'].value_counts()
    print('The user types are\n', user_types)

    # TO DO: Display counts of gender

    try:
      gender_types = df['Gender'].value_counts()
      print('\nGender Types:\n', gender_types)
    except KeyError:
      print("\nGender Types:\nNo data available for this month.")

    # TO DO: Display earliest, most recent, and most common year of birth

    try:
      Earliest_Year = df['Birth Year'].min()
      print('\nThe earliest Year is', Earliest_Year)
    except KeyError:
      print("\nEarliest Year:\nNo data available for this month.")

    try:
      Most_Recent_Year = df['Birth Year'].max()
      print('\nThe most Recent Year is', Most_Recent_Year)
    except KeyError:
      print("\nMost Recent Year:\nNo data available for this month.")

    try:
      Most_Common_Year = df['Birth Year'].value_counts().idxmax()
      print('\nThe most Common Year is', Most_Common_Year)
    except KeyError:
      print("\nMost Common Year:\nNo data available for this month.")

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

    
def display_data(df):
    """Display the sample of the data frame as requested by the user"""
    start_loc = 0
    end_loc = 5
    view_data = ''
    # To DO: To display the first 5 rows of individual df
    while view_data.lower() not in ['yes', 'no']:
        view_data = input('Would you like to view the first five rows of individual trip data? Enter yes or no\n')
        if view_data.lower() not in ['yes', 'no']:
            print('Please try again - Enter yes or no\n')
        elif view_data.lower() == "yes":
            print(df.iloc[start_loc:end_loc])
            #TO DO: To continue displaying the df as requested by user in 5s
            while True:
                view_display = input('\nDo you wish to continue viewing the rows? Enter yes or no\n')
                if view_display.lower() not in ['yes', 'no']:
                    print('Please try again - Enter yes or no\n')
                elif view_display.lower() == "yes":
                    start_loc += 5
                    end_loc += 5
                    print(df.iloc[start_loc:end_loc])
                elif view_display == "no":
                    return
        elif view_data.lower() == "no":
            return
    return
    

def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        display_data(df)
        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()


Hello! Let's explore some US bikeshare data!

Which of these cities would you like to filter: chicago, new york city or washington?
Chicago

Would you like to filter by month? If yes, type january, february, march, april, may, june or type 'all' if you want to view all the month at once.
MAy

Are you specific about a particular day you would like to check the data? If yes, do type sunday, monday, tuesday, wednesday, thursday, friday, saturday or type 'all' if you want all the days of the week.
FridaY
----------------------------------------
Would you like to view the first five rows of individual trip data? Enter yes or no
no

Calculating The Most Frequent Times of Travel...

The Most Common Month is 5
The Most Common day of the Week is Friday
The Most Common Start Hour is 8

This took 0.003985881805419922 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...


The most Commonly used start station is Clinton St & Washington Blvd

The most 