In [1]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

print('Hello! Let\'s explore some US bikeshare data!\n')


def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    while True :
            city=input('Choose city to explore from Chicago, New York City, or Washington. ')
            if city.lower() in CITY_DATA :
                break
            else:
                print('Make sure that you typed the city correctly')

    # get user input for month (all, january, february, ... , june)
    months = ['all','january', 'february', 'march', 'april', 'may', 'june']
    while True :
            month=input('Type a month to filter by ,or type "all" for no filtration. ')
            if month.lower() in months :
                break
            else:
                print('Make sure that you typed the month correctly')

    # get user input for day of week (all, monday, tuesday, ... sunday)
    days = ['all','saturday','sunday','monday','tuesday','wednesday','thursday','friday']
    while True :
            day=input('Type a day to filter by ,or type "all" for no filtration. ')
            if day.lower() in days :
                break
            else:
                print('Make sure that you typed the day correctly')

    print('-'*40)
    return city.lower(), month.lower(), day.lower()


def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    
    
    # load data file into a dataframe
    df = pd.read_csv(CITY_DATA[city])
    
    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # extract month and day of week from Start Time to create new columns
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()


    
    # filter by month if applicable
    if month != 'all':
        # use the index of the months list to get the corresponding int
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1
        # filter by month to create the new dataframe
        df = df[df['month'] == month]
    
    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        df = df[df['day_of_week'] == day.title()]

    

    return df


def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # display the most common month
    # extract month from the Start Time column to create a month column
    df['month'] = df['Start Time'].dt.month
    
    
    # display the most common day of week
    # extract month from the Start Time column to create a month column
    df['day'] = df['day_of_week'] #.dt.day_of_week


    # display the most common start hour

    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # extract hour from the Start Time column to create an hour column
    df['hour'] = df['Start Time'].dt.hour

    # find the most popular hour
    popular_hour = df['hour'].mode()[0]
    
    # find the most popular day
    popular_day = df['day'].mode()[0]
    
    # find the most popular month
    popular_month = df['month'].mode()[0]

    print('Most Popular Hour:', popular_hour)
    print('Most Popular Day:', popular_day)
    print('Most Popular Month:', popular_month)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)



def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    # extract most commonly used station from the Start station column to create...
    popular_start_station = df['Start Station'].mode()[0]
    print(f'Most commonly used start station: ({popular_start_station})')
    print()
    # display most commonly used end station
    popular_end_station = df['End Station'].mode()[0]
    print(f'Most commonly used start station: ({popular_end_station})')
    print()
    # display most frequent combination of start station and end station trip
    df['combination']='('+df['Start Station']+')'+'   AND   '+'('+df['End Station']+')'
    popular_combination = df['combination'].mode()[0]
    print(f'Most frequent combination of start station and end \nstation trip: {popular_combination}')
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)



def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    total_travel=df['Trip Duration'].sum()
    print(f'Total travel time: {total_travel}')
    # display mean travel time
    mean_travel=df['Trip Duration'].mean()
    print(f'Mean travel time: {mean_travel}')

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)



def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    print(df['User Type'].value_counts())
    print()
    
    # Display counts of gender
    if 'Gender' in df.columns:
        print(df['Gender'].value_counts())
    else:
        print('There is no data about gender in this city!')
    print()
    
    # Display earliest, most recent, and most common year of birth
    if 'Birth Year' in df.columns:
        print('Earliest year of birth: ',df['Birth Year'].min())
        print('Most recent year of birth: ',df['Birth Year'].max())
        print('Most common year of birth: ',df['Birth Year'].mode()[0])
    else :
        print('There is no data about birth year in this city!')
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)



def raw_data(df):
    
    count=1 
    while True:
        q=input('Do you want to see 5 lines of raw data?')
        if q.lower() == 'yes':
            print(df.iloc[(count-1)*5:count*5,:])
            count+=1
                
        elif q.lower() == 'no' :
            break
        else :
            print('Unknown input!')
        



def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        raw_data(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()

Hello! Let's explore some US bikeshare data!

Choose city to explore from Chicago, New York City, or Washington. chicago
Type a month to filter by ,or type "all" for no filtration. all
Type a day to filter by ,or type "all" for no filtration. all
----------------------------------------

Calculating The Most Frequent Times of Travel...

Most Popular Hour: 17
Most Popular Day: Tuesday
Most Popular Month: 6

This took 0.11968541145324707 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

Most commonly used start station: (Streeter Dr & Grand Ave)

Most commonly used start station: (Streeter Dr & Grand Ave)

Most frequent combination of start station and end 
station trip: (Lake Shore Dr & Monroe St)   AND   (Streeter Dr & Grand Ave)

This took 0.380979061126709 seconds.
----------------------------------------

Calculating Trip Duration...

Total travel time: 280871787
Mean travel time: 936.23929

This took 0.0019931793212890625 seconds.