In [1]:
import time
import pandas as pd
import numpy as np
import os

In [2]:
CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

In [3]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.
    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    def check_input(userinp,type_inp):
    
        while True:
            s =str(input(userinp)).lower()
            try:
                if s in ['chicago','new york city','washington'] and type_inp == 'city':
                    break
                elif s in ['all','january', 'february', 'march', 'april', 'may', 'june'] and type_inp == 'month':
                    break
                elif s in ['all','sunday','monday','tuesday','wednesday','thursday','friday','saturday'] and type_inp == 'day':
                    break
                else:
                    if type_inp == 'city':
                        print("Sorry, please enter correct city name!")
                    if type_inp == 'month':
                        print("Sorry, please enter correct moth name!")
                    if type_inp == 'day':
                        print("Sorry, please enter correct day name!")
            except ValueError:
                print("Sorry, your input is wrong")
        return s
    
    print('Hello! Let\'s explore some US bikeshare data!')
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    city = check_input("Enter city name would you like to see the data(chicago, new york city, washington): ",'city')
    # get user input for month (all, january, february, ... , june)
    month = check_input("Any Month you want(all, january, ... june): ", 'month')
    # get user input for day of week (all, monday, tuesday, ... sunday)
    day = check_input("Any day you want(all, monday, tuesday, ... sunday): ", 'day')
    print('-'*40)
    return city, month, day

In [4]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    # load data file into a dataframe
    df = pd.read_csv(CITY_DATA[city])

    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # extract month and day of week from Start Time to create new columns
    df['month'] = df['Start Time'].dt.month

    df['day_of_week'] = df['Start Time'].dt.day_name()
    
    # extract hour from the Start Time column to create an hour column
    df['hour'] = df['Start Time'].dt.hour

    
    # filter by month if applicable
    if month != 'all':
        # use the index of the months list to get the corresponding int
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1

        # filter by month to create the new dataframe
        df = df[df['month'] == month]

    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        df = df[df['day_of_week'] == day.title()]

    return df

In [5]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # display the most common month
    popular_month = df['month'].mode()[0]

    print('Most Popular Month:', popular_month)

    # display the most common day of week
    popular_day_of_week = df['day_of_week'].mode()[0]

    print('Most Day Of Week:', popular_day_of_week)

    # display the most common start hour
    popular_common_start_hour = df['hour'].mode()[0]

    print('Most Common Start Hour:', popular_common_start_hour)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


In [6]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station
    popular_start_station = df['Start Station'].mode()[0]

    print('Most Start Station:', popular_start_station)


    # TO DO: display most commonly used end station
    popular_end_station = df['End Station'].mode()[0]

    print('Most End Station:', popular_end_station)


    # TO DO: display most frequent combination of start station and end station trip
    group_field=df.groupby(['Start Station','End Station'])
    popular_combination_station = group_field.size().sort_values(ascending=False).head(1)
    print('Most frequent combination of Start Station and End Station trip:\n', popular_combination_station)


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


In [7]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time
    total_travel_time = df['Trip Duration'].sum()

    print('Total Travel Time:', total_travel_time)


    # TO DO: display mean travel time
    mean_travel_time = df['Trip Duration'].mean()

    print('Mean Travel Time:', mean_travel_time)


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [8]:
def user_stats(df,city):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types
    print('User Type Stats:')
    print(df['User Type'].value_counts())
    
    if city != 'washington':
        
        # Display counts of gender
        print('Gender Stats:')
        print(df['Gender'].value_counts())
        
        # Display earliest, most recent, and most common year of birth
        print('Birth Year Stats:')
        most_common_year = df['Birth Year'].mode()[0]
        print('Most Common Year:',most_common_year)
        most_recent_year = df['Birth Year'].max()
        print('Most Recent Year:',most_recent_year)
        earliest_year = df['Birth Year'].min()
        print('Earliest Year:',earliest_year)


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [9]:
def display_data(df):
    view_data = input("Would you like to view 5 rows of individual trip data? Enter yes or no?").lower()
    start_loc = 0
    while (view_data == 'yes'):
        print(df.iloc[start_loc:start_loc+5])
        start_loc += 5
        view_data = input("Do you wish to continue?: ").lower()
    

In [10]:
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df,city)
        display_data(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()


Hello! Let's explore some US bikeshare data!
Enter city name would you like to see the data(chicago, new york city, washington): washingtoN
Any Month you want(all, january, ... june): JUNE
Any day you want(all, monday, tuesday, ... sunday): SunDay
----------------------------------------

Calculating The Most Frequent Times of Travel...

Most Popular Month: 6
Most Day Of Week: Sunday
Most Common Start Hour: 12

This took 0.003997802734375 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

Most Start Station: Jefferson Dr & 14th St SW
Most End Station: Jefferson Dr & 14th St SW
Most frequent combination of Start Station and End Station trip:
 Start Station              End Station              
Jefferson Dr & 14th St SW  Jefferson Dr & 14th St SW    40
dtype: int64

This took 0.008993864059448242 seconds.
----------------------------------------

Calculating Trip Duration...

Total Travel Time: 14087980.618
Mean Travel Time: 1688.194202