In [2]:




import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    cities = ['new york city', 'chicago', 'washington']
    city = input('enter the city you would like to explore: ').lower()
    while city not in cities:
        city = input("please enter a valid city name(new york city, chicago or washington): ").lower()
        
    # TO DO: get user input for month (all, january, february, ... , june)
    months = ['january', 'february', 'march', 'april', 'june', 'all']
    month = input('enter a month name or enter all: ').lower()
    while month not in months:
        month = input('enter a valid month or choose all: ').lower()

    # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
    days = ['sunday', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'all']
    day = input('enter a day of week or enter all: ').lower()
    while day not in days:
        day = input('enter a day of week or enter all: ').lower() 
       
    print('-'*40)
    return (city, month, day)


def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    df = pd.read_csv(CITY_DATA[city])

    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # extract month and day of week from Start Time to create new columns
    df['Month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()

    
    # filter by month if applicable
    if month != 'all':
        # use the index of the months list to get the corresponding int
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1

        # filter by month to create the new dataframe
        df = df[df['Month'] == month]

    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        df = df[df['day_of_week'] == day.title()]

    return df


def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # TO DO: display the most common month
    common_month = df['Month'].mode()[0]
    print('the common month is: ', common_month)
    
    # TO DO: display the most common day of week
    common_day = df['day_of_week'].mode()[0]
    print('the common day of week is: ', common_day)
    # TO DO: display the most common start hour
    df['hour'] = df['Start Time'].dt.hour
    common_hour = df['hour'].mode()[0]
    print('the common hour is: ', common_hour )

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)



def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station
    common_start_station = df['Start Station'].mode()[0]
    print('the common start station is: ', common_start_station)

    # TO DO: display most commonly used end station
    common_end_station = df['End Station'].mode()[0]
    print('the common end station is: ', common_end_station)
    # TO DO: display most frequent combination of start station and end station trip
    df['common_trip'] = 'From "' + df['Start Station'] + '" To "' + df['End Station'] + '"'
    common_trip = df['common_trip'].mode()[0]
    print('the common trip is :', common_trip)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time
    total_travel_time = df['Trip Duration'].sum()
    print('total travel time is: ', total_travel_time)

    # TO DO: display mean travel time
    mean_travel_time = df['Trip Duration'].mean()
    print('the mean travel time is: ', mean_travel_time)


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types
    user_types = df['User Type'].value_counts().to_frame()
    print('the count of user types is: ', user_types)

    # TO DO: Display counts of gender
    if 'Gender' in df:
        gender_counts = df['Gender'].value_counts().to_frame()
        print('counts of gender: ', gender_counts )
    else:
        print("no data for gender")

    
    # TO DO: Display earliest, most recent, and most common year of birth
    try:
        earliest = int(df['Birth Year'].min())
        most_recent = int(df['Birth Year'].max())
        most_common = int(df['Birth Year'].mode())
        print('the oldest user born in :', earliest)
        print('the yongest user born in : ', most_recent)
        print('Most Common Birth Year: ', most_common)
    except:
        print('no data was found')

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def display_raw_data(city):
    """ Ask if user want to see 5 lines of raw data
        display 5 line of raw data if the user entered yes
        Stop the program when the user enter no.
    """
    want_data = input("\n do you want to show 5 raw of data? enter Yes .\n").lower()
    while want_data == 'yes':
        try:
            for x in pd.read_csv(CITY_DATA[city], chunksize=5):
                print(x)
                more_data = input("Do you want to see another 5 raw data? enter Yes: ").lower()
                if more_data != 'yes':
                    break
            break

        except KeyboardInterrupt:
            break

            
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        display_raw_data(city)
        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            print('Good Luck')
            break


if __name__ == "__main__":
	main()


Hello! Let's explore some bikeshare data!
enter the city name: washington
enter a month or enter all: all
enter a day of week or enter all: all
----------------------------------------

Calculating The Most Frequent Times of Travel...

the common month is:  6
the common day of week is:  Wednesday
the common hour is:  8

This took 0.07375979423522949 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

the common start station is:  Columbus Circle / Union Station
the common end station is:  Columbus Circle / Union Station
the common trip is : From "Jefferson Dr & 14th St SW" To "Jefferson Dr & 14th St SW"

This took 0.27002692222595215 seconds.
----------------------------------------

Calculating Trip Duration...

total travel time is:  371183985.484
the mean travel time is:  1237.2799516133334

This took 0.0064165592193603516 seconds.
----------------------------------------

Calculating User Stats...

the count of user types is:       