# US Bikeshare Data Project

In [13]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
             'new york city': 'new_york_city.csv',
             'washington': 'washington.csv' } 

CITIES = ['chicago', 'new york city', 'washington']

MONTHS = ['all','january', 'february', 'march', 'april', 'may', 'june']

DAYS = ['all','sunday', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday']

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    while True:
        city = input('which city do you want to explore chicago, new york city, washington? \n>').lower()
        if city in CITIES:
            break

            # TO DO: get user input for month (all, january, february, ... , june)
    while True:
        month = input('which month do you want to explore? all, january, february, march, april, may, june\n>')
        if month.lower() in MONTHS:
            break
        else:
            print("month not available please input either 'all' or january, february, march, april, may, june")        



                    # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
    while True:
        day = input('which day of the week would you love to take a look at? all, sunday, monday, tuesday, wednesday, thursday, friday, saturday\n')
        if day.lower() in DAYS:
            break
        else:    
            print("day of weeek not available please input either 'all' or sunday,monday,tuesday,wednesday,thursday,friday,saturday")

    print('-'*40)
    return city, month, day


def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    # load data file into dataframe
    df = pd.read_csv(CITY_DATA[city])
    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    # extract month and day of the week from Start Time to create new columns
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()
    df['hour'] = df['Start Time'].dt.hour
    # filter by month if applicable
    if month !='all':
        month = MONTHS.index(month)
        df = df.loc[df['month'] == month]                                          

    # filter by day of week to create the new dataframe
    if day !='all':
        df = df.loc[df['day_of_week'] == day.title()]                                                
    return df

def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # TO DO: display the most common month
    most_common_month = df['month'].mode()[0]     
    print('The most common month is :'+str(most_common_month))                                                 


    # TO DO: display the most common day of week
    most_common_day_of_week = df['day_of_week'].mode()[0]
    print('The most common day of the week is :'+str(most_common_day_of_week))                           

    # TO DO: display the most common start hour
    most_common_start_hour = df['Start Time'].mode()[0]
    print('The most common start hour is :'+str(most_common_start_hour))       

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station
    most_commonly_used_start_station = df['Start Station'].mode()[0]
    print('The most commonly used start station is :'+str(most_commonly_used_start_station))                              


    # TO DO: display most commonly used end station
    most_commonly_used_end_station = df['End Station'].mode()[0]
    print('Most commonly used end station is :'+str(most_commonly_used_end_station))                              


    # TO DO: display most frequent combination of start station and end station trip
    frequent_combination = (df['Start Station'] + "||" + df['End Station']).mode()[0]
    print("The most frequent combination of start station and end station trip is : " + str(frequent_combination.split("||")))                              

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time
    total_travel_time = df['Trip Duration'].sum()
    print("Total travel time :" +str(total_travel_time))
    # TO DO: display mean travel time
    mean_travel_time = df["Trip Duration"].mean()
    print('Mean travel time :' +str(mean_travel_time))          


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types
    user_types = df['User Type'].value_counts()
    print('User Types:\n{}'.format(user_types))
    

    # TO DO: Display counts of gender
    if "Gender" in df.columns:
        gender = df['Gender'].value_counts()
        print('Gender:\n{}'.format(gender))
        
    if "Birth Year" in df.columns:
        # TO DO: Display earliest, most recent, and most common year of birth
        earliest_birth = df['Birth Year'].min()
        most_recent_birth = df['Birth Year'].max()
        most_common_birth = df['Birth Year'].mode()[0]
        print('Earliest Birth Year:{}'.format(earliest_birth))
        print('Most Recent Birth Year:{}'.format(most_recent_birth))
        print('Most Common Birth Year:{}'.format(most_common_birth))      


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

def display_raw_data(df):
    """Displays raw data on user request."""
    print(df.head())
    next = 0
    while True:
        view_raw_data = input('\nWould you like to display 5 rows of raw data? Enter yes or no.\n')
        if view_raw_data.lower() != 'yes':
            return
        next = next + 5
        print(df.iloc[next:next+5])


def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        while True:
            view_raw_data = input('\nWould you like to display 5 rows of raw data? Enter yes or no.\n')
            if view_raw_data.lower() !='yes':
                break
            display_raw_data(df)
            break
                

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
    main()

Hello! Let's explore some US bikeshare data!
which city do you want to explore chicago, new york city, washington? 
>chicago
which month do you want to explore? all, january, february, march, april, may, june
>february
which day of the week would you love to take a look at? all, sunday, monday, tuesday, wednesday, thursday, friday, saturday
sunday
----------------------------------------

Calculating The Most Frequent Times of Travel...

The most common month is :2
The most common day of the week is :Sunday
The most common start hour is :2017-02-19 14:59:00

This took 0.008004903793334961 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

The most commonly used start station is :Streeter Dr & Grand Ave
Most commonly used end station is :Streeter Dr & Grand Ave
The most frequent combination of start station and end station trip is : ['Streeter Dr & Grand Ave', 'Streeter Dr & Grand Ave']

This took 0.011003255844116211 seconds.
---------