In [8]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

In [9]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    while True:
        valid_cities = ['chicago','new york city', 'washington']
        city = input("Which of these cities would you like to analyse? chicago, new york city or washington")
        if city in valid_cities:
            break
        else:
            print("You entered an invalid city.")


    # get user input for month (all, january, february, ... , june)
    month_spec = False
    decision_month = input("Would you want to analyse a specific month or all? Please enter \"all\" for all or enter anythingelse for a specific month")
    if decision_month != 'all':
        month_spec = True
        while True:
            valid_months = ['jan', 'feb', 'mar', 'apr','may','jun','jul','aug','sep','oct','nov','dec']
            month = input("Which moth of the year would you like to analyse? Enter one [jan, feb,mar,apr,may,jun,jul,aug,sep, oct, nov, dec]")
            if month in valid_months:
                break
            else:
                print("You have entered an invalid month")
    else:
        month_spec = False
        month = 'all'

    # get user input for day of week (all, monday, tuesday, ... sunday)
    day_spec = False
    decision_day = input("Would you want to analyse a specific day or all? Please enter \"all\" for all days or enter anythingelse for a specific day")
    if decision_day != 'all':
        day_spec = True
        while True:
            day = input("Please enter a number to specify day 1-sun, 2-mon, 3-tue, 4-wed, 5-thur, 6-fri, 7-sat")
            try:
                if 1<= int(day) <= 7:
                    break
                else:
                    print("Please enter an appropriate number corresponding to selected day")
            except ValueError:
                print("Please use numbers to represent day ")
    else:
        day_spec = False
        day = "all"

    print('-'*40)
    return city, month, day, month_spec, day_spec


In [10]:
def load_data(city, month, day, month_spec, day_spec):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    #read  csv file
    filename = CITY_DATA[city]
    df = pd.read_csv(filename)
    
    #create three new columns for month, day of the week and hour
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['Month'] = df['Start Time'].dt.month
    df['Week Day'] = df['Start Time'].dt.weekday
    df['Hour'] = df['Start Time'].dt.hour
    
    #check if user want to filter by month or day and filter
    month_map = {'jan':1, 'feb':2, 'mar':3, 'apr':4,'may':5,'jun':6,'jul':7,'aug':8,'sep':9,'oct':10,'nov':11,'dec':12}
    if month_spec:
        df = df.loc[df['Month'] == month_map[month]]
    if day_spec:    
        df = df.loc[df['Week Day'] == int(day)]

    return df


In [11]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # display the most common month
    most_month = df['Month'].mode()
    print(most_month)
    # display the most common day of week
    most_day = df['Week Day'].mode()
    print(most_day)

    # display the most common start hour
    most_hour = df['Hour'].mode()
    print(most_hour)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


In [12]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    print(df['Start Station'].mode())
    

    # display most commonly used end station
    print(df['End Station'].mode())
    
    # display most frequent combination of start station and end station trip
    df['Start-Stop'] = df['Start Station'] + " -- "+ df["End Station"]
    print(df['Start-Stop'].mode())


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [13]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    total_time = df['Trip Duration'].sum()
    print(total_time)

    # display mean travel time
    mean_time = df['Trip Duration'].mean()
    print(mean_time)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [14]:
def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    user_types = df['User Type'].value_counts()
    print(user_types)

    # Display counts of gender
    if 'Gender' in df.columns:
        gender_count = df['Gender'].value_counts()
        print(gender_count)

    # Display earliest, most recent, and most common year of birth
    if 'Birth Year' in df.columns:
        
        print(df['Birth Year'].nlargest(n = 1))
        print(df['Birth Year'].nsmallest(n = 1))

        print(df['Birth Year'].mode())

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


In [None]:
while True:
    city, month, day, month_spec, day_spec = get_filters()
    df = load_data(city, month, day, month_spec, day_spec)

    time_stats(df)
    station_stats(df)
    trip_duration_stats(df)
    user_stats(df)

    restart = input('\nWould you like to restart? Enter yes or no.\n')
    if restart.lower() != 'yes':
        break

Hello! Let's explore some US bikeshare data!
Which of these cities would you like to analyse? chicago, new york city or washingtonwashington
Would you want to analyse a specific month or all? Please enter "all" for all or enter anythingelse for a specific monthall
Would you want to analyse a specific day or all? Please enter "all" for all days or enter anythingelse for a specific dayall
----------------------------------------

Calculating The Most Frequent Times of Travel...

0    6
dtype: int64
0    2
dtype: int64
0    8
dtype: int64

This took 0.015630006790161133 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

0    Columbus Circle / Union Station
dtype: object
0    Columbus Circle / Union Station
dtype: object
0    Jefferson Dr & 14th St SW -- Jefferson Dr & 14...
dtype: object

This took 0.14055204391479492 seconds.
----------------------------------------

Calculating Trip Duration...

371183985.484
1237.2799516133446

This to

In [20]:
df['Gender']

KeyError: 'Gender'