In [4]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

In [5]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    cities = ['chicago','new york city','washington']
    city = str(input("please enter your requested city :")).lower().strip()

    while(city not in cities):
        city = str(input("please enter 'chicago','new york city' or'washington' :")).lower().strip()

    # get user input for month (all, january, february, ... , june)
    months = ["all", "january", "february", "march","april", "june"]
    month = str(input("please enter desired month or all")).lower().strip()

    while(month not in months):
        month = str(input("please enter a month from january to june :")).strip().lower()

    # get user input for day of week (all, monday, tuesday, ... sunday)
    days = ["all", "monday", "tuesday","Wednesday","Thursday","friday","saturday" , "sunday"]
    day = str(input("please enter desired day or all")).lower().strip().capitalize()

    while(day not in days):
        day = str(input("please enter any weekday :")).lower().strip().capitalize()

    print('-'*40)
    return city, month, day


In [6]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    df = pd.read_csv(CITY_DATA[city])
    df["Start Time"] = pd.to_datetime(df["Start Time"])
    df["Month"] = df["Start Time"].dt.month
    df["day_of_week"] = df["Start Time"].dt.day_name()


    if month != 'all':
        # use the index of the months list to get the corresponding int
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month)+1
    
        # filter by month to create the new dataframe
        df = df[df['Month']==month]

    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        df = df[df['day_of_week']==day.capitalize()]

    return df

In [7]:
df = load_data("chicago","april","friday")
#df = pd.read_csv("chicago.csv")
# df.columns
# df.head()

In [22]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # display the most common month
    print("the most common month is {}".format(df["Month"].mode()[0]))

    # display the most common day of week
    print("the most common day of week is {}".format(df["day_of_week"].mode()[0]))


    # display the most common start hour
    print("the most common start hour is {}".format(df["Start Time"].dt.hour.mode()[0]))


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


In [27]:
time_stats(df)


Calculating The Most Frequent Times of Travel...

the most common month is 4
the most common day of week is Friday
the most common start hour is 17

This took 0.0029997825622558594 seconds.
----------------------------------------


In [40]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    print("the most commonly used start station is {}".format(df["Start Station"].mode()[0]))


    # display most commonly used end station
    print("the most commonly used end station is {}".format(df["End Station"].mode()[0]))


    # display most frequent combination of start station and end station trip
    print("the most frequent combination of start station and end station trip".format((df["Start Station"]+" "+df["End Station"]).mode()[0]))


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [48]:
station_stats(df)


Calculating The Most Popular Stations and Trip...

the most commonly used start station is Clinton St & Washington Blvd
the most commonly used end station is Streeter Dr & Grand Ave
the most frequent combination of start station and end station trip

This took 0.00799870491027832 seconds.
----------------------------------------


In [94]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    print("total trip duration is {} seconds".format(df["Trip Duration"].sum()))

    # display mean travel time
    print("the mean trip duration time is {}",(df["Trip Duration"].mean()))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [95]:
trip_duration_stats(df)


Calculating Trip Duration...

total trip duration is 5545293 seconds
the mean trip duration time is {} 804.9489040499346

This took 0.0010004043579101562 seconds.
----------------------------------------


In [104]:
def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    print(df["User Type"].value_counts())

    # Display counts of gender
    print(df["Gender"].value_counts())

    # Display earliest, most recent, and most common year of birth
    print("min is {} max is {} common is {}".format(df["Birth Year"].min(),df["Birth Year"].max(),df["Birth Year"].mode()[0]))


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [105]:
user_stats(df)


Calculating User Stats...

Subscriber    5909
Customer       980
Name: User Type, dtype: int64
Male      4620
Female    1293
Name: Gender, dtype: int64
min is 1899.0 max is 2016.0 common is 1989.0

This took 0.004000663757324219 seconds.
----------------------------------------


In [16]:
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break

In [17]:
if __name__ == "__main__":
	main()
