In [1]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

In [2]:
#df = pd.read_csv('washington.csv')
#df.head()

In [3]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    while True:
        city=input("Enter the city you want to analyze from('chicago, new york city, washington'): ").lower()
        if city in CITY_DATA:
            break
        else:
            print('you enter invalid input')

    # get user input for month (all, january, february, ... , june)
    while True:
        month=input("Enter the month you want to analyze from('all, january, february, ... , june'): ").lower()
        if month in ['all','january', 'february','march','april','may','june']:
            break
        else:
            print('you enter invalid input')

    # get user input for day of week (all, monday, tuesday, ... sunday)
    while True:
        day=input("Enter the day you want to analyze from('all, monday, tuesday, ... sunday'): ").lower()
        if day in ['all','monday','tuesday','wednesday','thursday','friday','saturday','sunday']:
            break
        else:
            print('you enter invalid input')

    print('-'*40)
    
    return city, month, day

In [4]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    df = pd.read_csv(CITY_DATA[city])
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()
    df['hour'] = df['Start Time'].dt.hour

    if month != 'all':
        months=['january', 'february','march','april','may','june']
        month=months.index(month)+1
        df=df[df['month']==month]
    if day != 'all':
        df=df[df['day_of_week']== day.title()]

    return df


In [5]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # display the most common month
    month = df['month'].mode()[0]
    print('the most common month:\n',month)

    # display the most common day of week
    day = df['day_of_week'].mode()[0]
    print('the most common day:\n',day)

    # display the most common start hour
    hour = df['hour'].mode()[0]
    print('the most common start hour:\n',hour)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [12]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    start = df['Start Station'].mode()[0]
    print('the most common start station:\n',start)
    # display most commonly used end station
    end = df['End Station'].mode()[0]
    print('the most common start End Station:\n',end)    
    # display most frequent combination of start station and end station trip
    comb=df['Start Station'] + '-' + df['End Station']
    print('comb freq:\n',comb.mode()[0])

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [13]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    total=df['Trip Duration'].sum()
    #divide by 3600 to convert seconds to hours
    print('total travel time:\n',total/3600)

    # display mean travel time
    average=df['Trip Duration'].mean()
    print('mean travel time:\n',average/3600)
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [14]:
def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    counts_user = df['User Type'].value_counts()
    print('counts of user types:\n',counts_user)
    # Display counts of gender
    try:
        counts_gender = df['Gender'].value_counts()
        print('counts of gender:\n',counts_gender)
    except:
        print('no gender in this dataset')
    # Display earliest, most recent, and most common year of birth
   
    try:
        year = df['Birth Year']
        print('most recent year of birth:\n',year.max())
        print('most earliest year of birth:\n',year.min())
        print('most common year of birth:\n',year.mode()[0])
    
    except:
        print('no birth year in this dataset')

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [15]:
def display(df):
    z = 0
    while True:
        x = input('do you want do see 5 rows of the data "yes or no": ')
        if x =='yes':
            print(df[z:z+5])
            z+=5
        else:
            break

In [16]:
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        display(df)
        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break

In [17]:
if __name__ == "__main__":
	main()

Hello! Let's explore some US bikeshare data!
Enter the city you want to analyze from('chicago, new york city, washington'): chicago
Enter the month you want to analyze from('all, january, february, ... , june'): all
Enter the day you want to analyze from('all, monday, tuesday, ... sunday'): all
----------------------------------------

Calculating The Most Frequent Times of Travel...

the most common month:
 6
the most common day:
 Tuesday
the most common start hour:
 17

This took 0.03125643730163574 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

the most common start station:
 Streeter Dr & Grand Ave
the most common start End Station:
 Streeter Dr & Grand Ave
comb freq:
 Lake Shore Dr & Monroe St-Streeter Dr & Grand Ave

This took 0.18749237060546875 seconds.
----------------------------------------

Calculating Trip Duration...

total travel time:
 78019.94083333333
mean travel time:
 0.26006646944444445

This took 0.0 seconds.
