# BikeShare Data

In [1]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    cities = ['chicago','ew york city','washington']
    city = input('please enter the city\t').lower()
    while(city not in cities):
        print(f'\n{city} not defined please try again\n')
        city = input('please enter a valid city\t').lower()

    # TO DO: get user input for month (all, january, february, ... , june)
    months = ['january', 'february', 'march', 'april', 'may', 'june','all']
    month = input('please enter the month\t').lower()
    while(month not in months):
        print(f'\n{month} not defined please try again\n')
        month = input('please enter the correct month\t').lower()

    # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
    days = ['all','monday','tuesday','Wednesday','thursday','friday','saturday','sunday']
    day = input('please enter the day\t').lower()
    while(day not in days):
        print(f'\n{day} not defined please try again\n')
        day = input('please enter the correct day\t').lower()


    print('-'*40)
    return city, month, day


def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    df = pd.read_csv(CITY_DATA[city])
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()
    
    if month != 'all':
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1
        df = df[df['month'] == month]

    if day != 'all':
        df = df[df['day_of_week'] == day.title()]

    return df


def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # display the most common month
    df['month'] = df['Start Time'].dt.month
    popular_month = df['month'].mode()[0]
    print(f'The most common month : {popular_month}\n')

    # display the most common day of week
    df['day_of_week'] = df['Start Time'].dt.day_name()
    popular_day = df['day_of_week'].mode()[0]
    print(f'The most common day of week : {popular_day}\n')

    # display the most common start hour
    df['hour'] = df['Start Time'].dt.hour
    popular_hour = df['hour'].mode()[0]
    print(f'The most common start hour : {popular_month}\n')

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    common_start_station = df['Start Station'].mode()[0]
    print(f'The most common used start hour : {common_start_station}\n')
    # TO DO: display most commonly used end station
    common_end_station = df['End Station'].mode()[0]
    print(f'The most common used end station : {common_end_station}\n')
    # TO DO: display most frequent combination of start station and end station trip
    freq_combination = (df['Start Station'] +' $ '+  df['End Station']).mode()[0]
    print(f'The most frequent combination of start station and end station trip : {freq_combination}\n')


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    df['End Time'] = pd.to_datetime(df['End Time'])
    df['total_travel_time'] = df['End Time'].dt.hour  - df['Start Time'].dt.hour
    print(df['total_travel_time'],'\n')

    # display mean travel time
    print('mean travel time : '+str(df['total_travel_time'].mean())+'\n')

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    print('counts of user types : \n' + str(dict(df['User Type'].value_counts())) + '\n')

    # Display counts of gender
    if 'Gender' in df.columns:
        print('counts of gender : \n',str(dict(df['Gender'].value_counts())) + '\n')

    # Display earliest, most recent, and most common year of birth
    if 'Birth Year' in df.columns:
        print('earliest of birth :',str(df['Birth Year'].min()) + '\n')
        print('recent of birth :',str(df['Birth Year'].max()) + '\n')
        print('common year of birth : ',str(df['Birth Year'].mode()[0]))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)
    
def display_data(df):
    index=0
    user_input=input('would you like to display 5 rows of raw data? ').lower()
    while user_input in ['yes','y','yep','yea'] and index+5 < df.shape[0]:
        print(df.iloc[index:index+5])
        index += 5
        user_input = input('would you like to display more 5 rows of raw data? ').lower()


def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        display_data(df)
        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break
        

if __name__ == "__main__":
	main()


Hello! Let's explore some US bikeshare data!
please enter the city	Washington
please enter the month	all
please enter the day	all
----------------------------------------

Calculating The Most Frequent Times of Travel...

The most common month : 6

The most common day of week : Wednesday

The most common start hour : 6


This took 0.1845083236694336 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

The most common used start hour : Columbus Circle / Union Station

The most common used end station : Columbus Circle / Union Station

The most frequent combination of start station and end station trip : Jefferson Dr & 14th St SW $ Jefferson Dr & 14th St SW


This took 0.17556023597717285 seconds.
----------------------------------------

Calculating Trip Duration...

0         0
1         0
2         0
3         1
4         1
         ..
299995    0
299996    1
299997    0
299998    0
299999    0
Name: total_travel_time, Length: 300000, d