In [1]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york': 'new_york_city.csv',
              'washington': 'washington.csv' }


In [2]:

def get_filters():
    
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    print()
    city=input("Enter the city you want to explore its data; Chicago, New york or Washington: ").lower()
    while city not in CITY_DATA.keys():
        print("Invalid city name")
        city=input("Enter the city you want to explore its data; Chicago, New york or Washington: ").lower()
    
    
    filter = input("Do you want to filter the data to a specific month, day, both or none?: ").lower()
    while filter not in ['month', 'day', 'both' , 'none']:
        print("Choose one of the options above!")
        filter=input("Do you want to filter the data to a specific month, day, both or none?: ").lower()
        
    
    # get user input for month (all, january, february, ... , june)
    
    months = ['january', 'february', 'march', 'april', 'may', 'june']
    if (filter == 'month' or filter == 'both'):
        month= input("Which month do you want to present its data? january, february... june: ").lower()
        while month not in months:
            print("Choose one of the options above!")
            month=input("Which month do you want to present its data? january, february... june: ").lower()
    else:
        month = 'all'
    
    days=['saturday','sunday','monday','tuesday','wednesday','thursday','friday']
    # get user input for day of week (all, monday, tuesday, ... sunday)
    if (filter == 'day' or filter == 'both'):
        day= input("Which day do you want to present its data? saturday, sunday... friday: ").lower()
        while day not in days:
            print("Choose one of the options above!")
            day= input("Which day do you want to present its data? saturday, sunday... friday: ").lower()
    else:
        day='all'
        
    print('-'*40)
    return city, month, day



In [3]:

def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.
    
    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    df=pd.read_csv(CITY_DATA[city])
    df['Start Time']=pd.to_datetime(df['Start Time'])
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()
    if month!= 'all':
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month=months.index(month)+1
        df=df[df['month']==month]
    if day!='all':
        df = df[df['day_of_week']==day.title()]
        


    return df


In [4]:

def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()
    months = ['january', 'february', 'march', 'april', 'may', 'june']
    month=df['month'].mode()[0]
    
    # display the most common month
    print('the most common month is: ', months[month-1])

    # display the most common day of week
    day = df['day_of_week'].mode()[0]
    print('the most common day of week is: ', day)

    # display the most common start hour
    df['hour']=df['Start Time'].dt.hour
    most_common_hour=df['hour'].mode()[0]
    print('the most common hour is: ', most_common_hour)
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)



In [5]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    most_common_start_station=df['Start Station'].mode()[0]
    print('the most common start station is: ', most_common_start_station)

    # display most commonly used end station
    most_common_end_station=df['End Station'].mode()[0]
    print('the most common end station is: ', most_common_end_station)


    # display most frequent combination of start station and end station trip
    most_common_trip=df['Start Station'] +' to '+ df['End Station']
    print('the most common trip is: ', most_common_trip.mode()[0])
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


In [6]:

def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()
    total_travel_duration = (pd.to_datetime(df['End Time']) - pd.to_datetime(df['Start Time'])).sum()
    
    days =  total_travel_duration.days
    hours = total_travel_duration.seconds // (60*60)
    minutes = total_travel_duration.seconds % (60*60)//60
    seconds = total_travel_duration.seconds % (60*60)%60
    print('The total travel duration is: {} days {} hours {} minutes {} seconds'.format(days,hours,minutes,seconds))
    # display total travel time
    average_travel_duration = (pd.to_datetime(df['End Time']) - pd.to_datetime(df['Start Time'])).mean()
    days =  average_travel_duration.days
    hours = average_travel_duration.seconds // (60*60)
    minutes = average_travel_duration.seconds % (60*60)//60
    seconds = average_travel_duration.seconds % (60*60)%60
    # display mean travel time
    print('The average travel duration is: {} days {} hours {} minutes {} seconds'.format(days,hours,minutes,seconds))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


In [7]:
def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()
    print (df['User Type'].value_counts())
    # Display counts of user types
    if 'Gender' in (df.columns):
        print (df['Gender'].value_counts())
    # Display counts of gender
    if 'Birth Year' in (df.columns):
        df['Birth Year'] = pd.to_numeric(df['Birth Year'] , errors='raise',downcast='integer')
        birth_year=df['Birth Year']
        print('The earliest birth year is: {}, the most recent is:{} and the most common is: {}'.format(birth_year.min(),birth_year.max(),birth_year.mode()[0]))
    # Display earliest, most recent, and most common year of birth

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


In [8]:
def display_data(df):
    view_data = input('\nWould you like to view 5 rows of individual trip data? Enter yes or no\n').lower()
    if view_data=='yes':
        start_loc = 0
        while (view_data):
            print(df.iloc[start_loc:start_loc+5])
            start_loc += 5
            view_data = input("View next 5 rows? Enter yes or no\n: ").lower()
            if view_data!='yes':
                break

In [9]:
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        display_data(df)
        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()

Hello! Let's explore some US bikeshare data!

Enter the city you want to explore its data; Chicago, New york or Washington: washington
Do you want to filter the data to a specific month, day, both or none?: none
----------------------------------------

Calculating The Most Frequent Times of Travel...

the most common month is:  june
the most common day of week is:  Wednesday
the most common hour is:  8

This took 0.04686427116394043 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

the most common start station is:  Columbus Circle / Union Station
the most common end station is:  Columbus Circle / Union Station
the most common trip is:  Jefferson Dr & 14th St SW to Jefferson Dr & 14th St SW

This took 0.10935401916503906 seconds.
----------------------------------------

Calculating Trip Duration...

The total travel duration is: 2587 days 3 hours 36 minutes 47 seconds
The average travel duration is: 0 days 0 hours 12 minutes 25 seco