In [1]:
import time
import pandas as pd
import numpy as np


In [2]:
CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

In [3]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    city= input("which city do you want to explore?\n chicago , new work or washinton").lower()
    while city not in CITY_DATA.keys():
        print("it's not a valid city , please choose a valid one to continue ")
        city=input("which city do you want to explore ?\n chicago , new work or washinton")

    # TO DO: get user input for month (all, january, february, ... , june)
    months=['january' , 'febrauary' , 'march', 'april' , 'may' , 'june' , 'all']
    while True:
        month = input("in what month?\n january , febrauary , march , april , may , june  or all ").lower()
        if month in months:
            break
        else:
            print("it's not a valid month , please choose a valid one to continue")


    # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
    days=['sunday' , 'monday', 'tuesday' , 'wednesday' , 'thursday', 'friday' , 'all']
    while True:
        day=input('on what day ?\nsunday , monday, tuesday , wednesday , thursday, friday , all').lower()
        if day in days:
            break
        else:
            print("it's not a valid day , please choose a valid one to continue")

    print('-'*40)
    return city, month, day

In [4]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - pandas DataFrame containing city data filtered by month and day
    """
    
    # load data file into a dataframe
    data = pd.read_csv(CITY_DATA[city])

    # convert the Start Time column to datetime
    data['Start Time'] = pd.to_datetime(data['Start Time'])

    # extract month and day of week from Start Time to create new columns
    data['month'] = data['Start Time'].dt.month
    data['day_of_week'] = data['Start Time'].dt.day_name()
    data["day_of_month"] = data['Start Time'].dt.day
    data['hour'] = data["Start Time"].dt.hour

    # filter by month if applicable
    if month != 'all':
        # use the index of the months list to get the corresponding int
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1

        # filter by month to create the new dataframe
        data = data[data['month'] == month]

    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        data = data[data['day_of_week'] == day.title()]

    return data

In [5]:
def time_stats(data):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # TO DO: display the most common month
    print('the most common month is :{}'.format(data['month'].mode()[0]))


    # TO DO: display the most common day of week
    print('the most common day is :{}'.format(data['day_of_week'].mode()[0]))


    # TO DO: display the most common start hour
    print('the most common start hour is :{}'.format(data['hour'].mode()[0]))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [6]:
def station_stats(data):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station
    print('the most common start station is :{}'.format(data['Start Station'].mode()[0]))
    
    # TO DO: display most frequent combination of start station and end station trip
    data['ride']=data['Start Station'] +','+ data['End Station']
    print('the most common ride is : {}'.format(data['ride'].mode()[0]))


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [7]:
def trip_duration_stats(data):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time
    print('total travel time : ',(data['Trip Duration'].sum()))


    # TO DO: display mean travel time
    print('mean travel time : ',(data['Trip Duration'].mean()))


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [8]:
def user_stats(data):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types
    print(data['User Type'].value_counts())


    # Display counts of gender
    # washington dataset has no gender, so if the user entered it
    # we throw an excecption: gender is not available
    try:
        gender = data["Gender"].value_counts()
        print("counts of user gender: \n",gender)
    except:
        print('No gender available in this Stats\n')

    # Display earliest , most recent, and most common year of birth
    # washington dataset has no birth year, so if the user entered it
    # we throw an excecption: birth year is not available
    try:
        earliest_year = int(data['Birth Year'].min())
        print("earliest year of birth:",earliest_year)
        
        most_recent_year = int(data['Birth Year'].max())
        print("most recent year of birth:",most_recent_year)
        
        most_common_year = int(data['Birth Year'].mode()[0])
        print("most common year of birth:",most_common_year)
    except:
        print('No birth year available in this Stats\n')

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [9]:
def show_data(data): 
    """
    show a sample of data
    """
    i = 5
    order = ''
    
    while order != "no" and "yes":
        
        order = input("Do you want to see a {} samples of data?: yes or no\n".format(i)).lower()
        
        if order == "no":
            break
            
        elif order == 'yes':
            print(data.head(i))
            print('-'*40)
            i += 5
            
        else:
            print("please enter a valid choice")    


In [10]:
def main():
    while True:
        city, month, day = get_filters()
        data = load_data(city, month, day)

        time_stats(data)
        station_stats(data)
        trip_duration_stats(data)
        user_stats(data)
        show_data(data)
        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break

if __name__ == "__main__":
	main()


Hello! Let's explore some US bikeshare data!
which city do you want to explore?
 chicago , new work or washintonchicago
in what month?
 january , febrauary , march , april , may , june  or all may
on what day ?
sunday , monday, tuesday , wednesday , thursday, friday , allwednesday
----------------------------------------

Calculating The Most Frequent Times of Travel...

the most common month is :5
the most common day is :Wednesday
the most common start hour is :17

This took 0.0039997100830078125 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

the most common start station is :Clinton St & Washington Blvd
the most common ride is : Canal St & Madison St,Michigan Ave & Washington St

This took 0.004000186920166016 seconds.
----------------------------------------

Calculating Trip Duration...

total travel time :  8989587
mean travel time :  810.0186520093711

This took 0.0 seconds.
----------------------------------------

Calculati