In [1]:
import pandas as pd
import numpy as np
import time
# note: install tabulate (pip install tabulate)
import tabulate
from tabulate import tabulate


CITY_DATA = { 'chicago': 'chicago.csv',
              'new york': 'new_york_city.csv',
              'washington': 'washington.csv' }

def get_filters():
     
    #Ask user to specify a city (chicago, new york city, washington)
    cities=["chicago","new york","washington"]
    city=input("Would you like to see data for Chicago, New York or Washington?").lower()
    while city not in cities:
        city=input("Invalid input. Please try again.\nWould you like to see data for Chicago, New York or Washington?").lower()

    #Ask user to specify a month ("january", "february", "march" , "april", "may" , "june")
    months=["january", "february", "march" , "april", "may" , "june", "all"]
    month=input("Which month? January, February, March , April, May , June or All?").lower()
    while month not in months:
        month=input("Invalid input! Please try again.\nWhich month? January, February, March , April, May , June or All?").lower()


    #Ask user to specify a day (1:7)
    day=int(input("Which day? (1:7)."))
     
    print('\n\n\n')
    return city, month, day




def load_data(city, month, day): 
    # load data file  
    df = pd.read_csv(CITY_DATA[city])

    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    

    # create new columns fo month, day of week and hour
    df['month'] = df['Start Time'].dt.month
    df['day'] = df['Start Time'].dt.weekday
    df['hour'] = df['Start Time'].dt.hour
        
   
    # filter by month if applicable
    if month != 'all':
        # use the index of the months list to get the corresponding int
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1

        # filter by month to create the new dataframe
        df = df[df['month'] == month]

    # filter by day of week if applicable
    if day < 1 or day > 7:
        day=int(input("Invalid day! Please enter a day number from 1 to 7. "))
        
    if day != 'all':
        # filter by day of week to create the new dataframe
        df = df[df['day'] == int(day)]


    return df



def display_raw_data(df): 

    print('\nRaw data is available to check... \n')

    # counter for the rows 
    start_loc = 0

    # user input
    view_data = input('Would you like to view 5 rows of individual trip data? Enter Yes or No\n').lower()
    
    
    # Validating user input 
    while view_data not in ['yes', 'no']:
        view_data = input('Invalid input! Please enter Yes or No\n').lower()
        
    # if yes
    while view_data.lower() == 'yes': 
        print(tabulate(df.iloc[np.arange(0,5)], headers ="keys"))
        start_loc+=5
        view_data = input('Do you wish to view another 5 rows of individual trip data? Enter Yes or No\n').lower()
        
    # if no
    if view_data.lower() == 'no':
        print('\nThank you')

        
    while view_data not in ['yes', 'no']:
        view_data = input('Invalid input! Please enter Yes or No\n').lower()
        # if yes
        while view_data.lower() == 'yes':
            print(df.iloc[start_loc:start_loc+5])
            start_loc+=5
            view_data = input('Do you wish to view another 5 rows of individual trip data? Enter Yes or No\n').lower()

        # if no
        if view_data.lower() == 'no':
            print('\nThank you')
        

def time_stats(df): 

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    #  display the most common month
    popular_month = df['month'].mode()[0]
    print('\nMost Frequent Month:', popular_month)


    #   display the most common day of week
    popular_day = df['day'].mode()[0]
    print('\nMost Frequent Day:', popular_day)


    #  display the most common start hour
    popular_hour = df['hour'].mode()[0]
    print('\nMost Frequent Start Hour:', popular_hour)


    print("\nThis took %s seconds." % (time.time() - start_time))
    
    print('\n\n\n')


def station_stats(df): 

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    popular_st_station = df['Start Station'].mode()[0]
    print('\nMost commonly used start station:', popular_st_station)


    #  display most commonly used end station
    popular_end_station = df['End Station'].mode()[0]
    print('\nMost commonly used end station:', popular_end_station)


    # display most frequent combination of start station and end station trip
    df["route"] = df["Start Station"] + " - " + df["End Station"]
    most_frequent_route = df['route'].mode()[0]
    # then user the print statements to print it
    print('\nMost frequent combination of start station and end station trip:',most_frequent_route)


    print("\nThis took %s seconds." % (time.time() - start_time))
    
    print('\n\n\n')


def trip_duration_stats(df): 

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time
    total_travel_time = df["Trip Duration"].sum()
    print("The total travel time is:",total_travel_time)


    # TO DO: display mean travel time
    mean_travel_time = df["Trip Duration"].mean()
    print("The mean travel time is:",mean_travel_time)


    print("\nThis took %s seconds." % (time.time() - start_time))
    
    print('\n\n\n')


def user_stats(df,city): 

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types
    user_types = df['User Type'].value_counts()
    print("\nThe counts of user types is:\n",user_types)
    
    if city != "washington":
        
        # TO DO: Display counts of gender
        gender = df['Gender'].value_counts()
        print("\nThe counts of gender is:\n",gender)


        # TO DO: Display earliest, most recent, and most common year of birth
        earliest_year = int(np.min(df['Birth Year']))
        print("\nThe earliest year of birth is:",earliest_year)

        most_recent_year = int(np.max(df['Birth Year']))
        print("\nThe most recent year of birth is:",most_recent_year)

        comn_birth_year = int(df['Birth Year'].mode()[0])
        print("\nThe most common year of birth is:",comn_birth_year)


        print("\nThis took %s seconds." % (time.time() - start_time))
        
        print('\n\n\n') 
    else: 
        print("\nGender & Birth year data not available for Washington city\n")
        print('\n\n\n') 


def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)
        display_raw_data(df)
        print('\n\n\n') 

    
        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df,city)

        restart = input('\nWould you like to restart? Enter yes or no.\n').lower()
        
        if restart.lower() == 'no':
            print("\nThank you!")
            break
        elif restart.lower() != 'yes':
            restart = input('\nInvalid input! Please enter yes or no.\n').lower()


if __name__ == "__main__":
	main()


Would you like to see data for Chicago, New York or Washington?Washington
Which month? January, February, March , April, May , June or All?may
Which day? (1:7).2





Raw data is available to check... 

Would you like to view 5 rows of individual trip data? Enter Yes or No
yes
       Unnamed: 0  Start Time           End Time               Trip Duration  Start Station                             End Station                       User Type      month    day    hour
---  ------------  -------------------  -------------------  ---------------  ----------------------------------------  --------------------------------  -----------  -------  -----  ------
 89       1347499  2017-05-31 08:00:30  2017-05-31 08:12:36          726.087  Jefferson Dr & 14th St SW                 Hains Point/Buckeye & Ohio Dr SW  Customer           5      2       8
115       1340812  2017-05-31 01:56:37  2017-05-31 02:04:56          498.997  Lincoln Park / 13th & East Capitol St NE  2nd St & Massachusetts Ave NE   