In [22]:
import numpy as np
import pandas as pd
import time

In [23]:
CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

In [24]:
def get_filters():
    
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    
    print('Hello! Let\'s explore some US bikeshare data!')
    
    available_cities = ['chicago','washington','new york']
    while True:
        print('Would you like to see data for chicago, new york or washington')
        city = input().lower()
        if city in available_cities:
            break
    else:
        print('This city is not part of the options available')
    
    #What would the data be filtered by
    print('Would you like to filter the data by month, day, both or not at all? Type "none" for no time filter')
    input().lower()
    
    # get user input for month (all, january, february, ... , june)

    print('Do you want to analyse all months or a specific month')
    month_decision = input('Please enter yes for all and no for a specific month: ').lower()
    if month_decision == 'yes':
        month = 'all'
    else:
        month_list = ['january','february','march','april','may','june']
        while True:
            print('What month would you like to analyse? January, February, March, April, May, or June?...')
            month = input().lower()
            if month in month_list:
                break
            else:
                print('This month is not part of the options available')
        
    # get user input for day of week (all, monday, tuesday, ... sunday)
    print('Do you want to analyse all days of the week or a specific day')
    day_decision = input('Enter yes for all : ')
    if day_decision == 'yes':
        day = 'all'
    else:
        while True:
            try:
                print('What day would you like to analyse? Please type your response as an integer e.g(0 = Sunday)')
                day = int(input())
                if -1 < day < 7:
                    break
                else:
                    print("This is out of range")
            except:
                print('This is an invalid day')
            
    print('-'*40)
    return city, month, day

In [25]:
city, month, day = get_filters()

Hello! Let's explore some US bikeshare data!
Would you like to see data for chicago, new york or washington
chicago
Would you like to filter the data by month, day, both or not at all? Type "none" for no time filter
both
Do you want to analyse all months or a specific month
Please enter yes for all and no for a specific month: yes
Do you want to analyse all days of the week or a specific day
Enter yes for all : yes
----------------------------------------


In [26]:
city

'chicago'

In [27]:
month

'all'

In [28]:
day

'all'

In [29]:
def load_data(city, month, day):
    df = pd.read_csv(CITY_DATA[city])
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.weekday_name
    df['hour'] = df['Start Time'].dt.hour
    non_index = {'january':1,'february':2,'march':3,'april':4,'may':5,'june':6}
    if month != 'all':
        df = df.loc[df['month'] == non_index[month]]
    else:
        return df
    day_index = {0:'Sunday',1:'Monday',2:'Tuesday',3:'Wednesday',4:'Thursday',5:'Friday',6:'Saturday'}
    if day != 'all':
        df = df.loc[df['day_of_week'] == day_index[day]]
    else:
        return df
    return df

In [30]:
df = load_data(city,month,day)

In [31]:
df.head()

Unnamed: 0.1,Unnamed: 0,Start Time,End Time,Trip Duration,Start Station,End Station,User Type,Gender,Birth Year,month,day_of_week,hour
0,1423854,2017-06-23 15:09:32,2017-06-23 15:14:53,321,Wood St & Hubbard St,Damen Ave & Chicago Ave,Subscriber,Male,1992.0,6,Friday,15
1,955915,2017-05-25 18:19:03,2017-05-25 18:45:53,1610,Theater on the Lake,Sheffield Ave & Waveland Ave,Subscriber,Female,1992.0,5,Thursday,18
2,9031,2017-01-04 08:27:49,2017-01-04 08:34:45,416,May St & Taylor St,Wood St & Taylor St,Subscriber,Male,1981.0,1,Wednesday,8
3,304487,2017-03-06 13:49:38,2017-03-06 13:55:28,350,Christiana Ave & Lawrence Ave,St. Louis Ave & Balmoral Ave,Subscriber,Male,1986.0,3,Monday,13
4,45207,2017-01-17 14:53:07,2017-01-17 15:02:01,534,Clark St & Randolph St,Desplaines St & Jackson Blvd,Subscriber,Male,1975.0,1,Tuesday,14


In [32]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # display the most common month
    most_common_month = df['month'].mode()[0]
    print("the most common month is {}".format(most_common_month))

    # display the most common day of week
    most_common_day = df['day_of_week'].mode()[0]
    print("the most common day of week is {}".format(most_common_day))

    # display the most common start hour
    most_common_Starthour = df['hour'].mode()[0]
    print("the most common start hour {}".format(most_common_Starthour))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


In [33]:
time_stats(df)


Calculating The Most Frequent Times of Travel...

the most common month is 6
the most common day of week is Tuesday
the most common start hour 17

This took 0.2318108081817627 seconds.
----------------------------------------


In [34]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    most_used_start_station = df['Start Station'].mode()[0]
    print("the most commonly used start station is {}".format(most_used_start_station))

    # display most commonly used end station
    most_used_end_station = df['End Station'].mode()[0]
    print("the most commonly used end station is {}".format(most_used_end_station))

    # display most frequent combination of start station and end station trip
    df['start-End Station'] = df['Start Station'] + "-" + df['End Station']
    most_frequent_combo = df['start-End Station'].mode()[0]
    print("the most frequent combo is {}".format(most_frequent_combo))
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


In [35]:
station_stats(df)


Calculating The Most Popular Stations and Trip...

the most commonly used start station is Streeter Dr & Grand Ave
the most commonly used end station is Streeter Dr & Grand Ave
the most frequent combo is Lake Shore Dr & Monroe St-Streeter Dr & Grand Ave

This took 1.0026895999908447 seconds.
----------------------------------------


In [36]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    total_travel_time = df['Trip Duration'].sum()
    print("the total travel time is {}".format(total_travel_time))
    
    # display mean travel time
    mean_travel_time = df['Trip Duration'].mean()
    print("the mean travel time is {}".format(mean_travel_time))
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [37]:
trip_duration_stats(df)


Calculating Trip Duration...

the total travel time is 280871787
the mean travel time is 936.23929

This took 0.022156476974487305 seconds.
----------------------------------------


In [38]:
def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    user_types_count = df['User Type'].value_counts()
    print(user_types_count)
    
    # Display counts of gender
    if 'Gender' in df.columns:
        gender_count = df['Gender'].value_counts()
        print(gender_count)
    else:
        print('No data for gender')
        
    # Display earliest, most recent, and most common year of birth
    if 'Birth Year' in df.columns:
        earliest_yob = df['Birth Year'].min()
        print("the earliest year of birth is {}".format(earliest_yob))
        
        most_recent_yob = df['Birth Year'].max()
        print("the most recent year of birth is {}".format(most_recent_yob))
        
        most_common_yob = df['Birth Year'].mode()
        print("the most common year of birth is {}".format(most_common_yob))
        
    else:
        print('No data for birth year')
        
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


In [39]:
user_stats(df)


Calculating User Stats...

Subscriber    238889
Customer       61110
Dependent          1
Name: User Type, dtype: int64
Male      181190
Female     57758
Name: Gender, dtype: int64
the earliest year of birth is 1899.0
the most recent year of birth is 2016.0
the most common year of birth is 0    1989.0
dtype: float64

This took 0.75527024269104 seconds.
----------------------------------------


In [47]:
def display_data(df):
    
    print('\nRaw data is available to check....\n')
    
    i=0
    user_input =input('Would you like to display 5 rows of raw data?, Please type yes or no ').lower()
    if user_input not in['yes','no']:
        print('That is an invalid choice, Kindly type yes or no')
        user_input =input('Would you like to display 5 rows of raw data?, Please type yes or no ').lower()
    elif user_input != 'yes':
        print('Thank you')
    else:
        while i+5 < df.shape[0]:
            
            print(df.iloc[i:i+5])
            i += 5
            user_input =input('Would you like to display more 5 rows of raw data?, Please type yes or no ').lower()
            if user_input != 'yes':
                print('Thank You')
                break

In [48]:

def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        display_data(df)
        
        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()


Hello! Let's explore some US bikeshare data!
Would you like to see data for chicago, new york or washington
chicago
Would you like to filter the data by month, day, both or not at all? Type "none" for no time filter
both
Do you want to analyse all months or a specific month
Please enter yes for all and no for a specific month: no
What month would you like to analyse? January, February, March, April, May, or June?...
may
Do you want to analyse all days of the week or a specific day
Enter yes for all : no
What day would you like to analyse? Please type your response as an integer e.g(0 = Sunday)
5
----------------------------------------

Calculating The Most Frequent Times of Travel...

the most common month is 5
the most common day of week is Friday
the most common start hour 8

This took 0.02022576332092285 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

the most commonly used start station is Clinton St & Washington Blvd
the most 