In [4]:
import pandas as pd
import time

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york': 'new_york_city.csv',
              'washington': 'washington.csv' }
###########################################################################################################

def get_filters():
    
    """
Asks user to specify a city, month, and day to analyze.

Returns:
    (str) city - name of the city to analyze
    (str) month - name of the month to filter by, or "all" to apply no month filter
    (str) day - name of the day of week to filter by, or "all" to apply no day filter
"""
    #make the return values global to use it outside the function 
    global city 
    global month
    global day
    
    #declare days, months, and cities lists
    day_list = [ 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
    month_list =['january', 'february', 'march', 'april', 'may', 'june']
    city_list = ['chicago', 'new york', 'washington']
    
    #filter by city ----- return the chosen city.
    while True:
        city = input('Would you like to see data for Chicago, New York, or Washington? \n')
        #make the input in lower case and remove and spaces between the word to avoid raw user input errors
        city = city.lower().strip()  
        
        if city in city_list:
            break

        else:
            print('your input isn\'t in the data, please enter a city from the given cities \n')
    
    #filter by month, day, both, or no filter ------ return the filter type.
    while True:
        filter_by = input('Would you like to filter the data by month, day, both, or not at all? (type [none] for no time filter)\n')
        filter_by = filter_by.lower().strip()
       
        #the chosen filter is month ------- return the chosen month and 'all' for day.
        if filter_by == 'month':
            day = 'all'
            
            while True:
                month = input('Which month - January, February, March, April, May, or June? \n')
                month = month.lower().strip()
                
                if month in month_list:
                    break

                else:
                    print ('invalid input, please try again \n')
                    
                
            break    
        
        #the chosen filter is day ------- return the chosen day and 'all' for month.
        elif filter_by == 'day':
            month = 'all'
            
            while True:
                day = input('Which day - Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, or Sunday? \n')
                day = day.lower().strip()
                
                if day in day_list:
                    break

                else:
                    print ('invalid input, please try again \n')
            
            break
        
        #the chosen filter is both -------- return the chosen month and day.            
        elif filter_by == 'both':
            
            while True:
                month = input('Which month - January, February, March, April, May, or June? \n')
                month = month.lower().strip()
                
                if month in month_list:
                    break

                else:
                    print ('invalid input, please try again \n')
            
            while True:
                day = input('Which day - Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, or Sunday? \n ')
                day = day.lower().strip()
                
                if day in day_list:
                    break

                else:
                    print ('invalid input, please try again \n')
                    
            break
            
        #the chosen filter is none ------- return 'all' for month and day            
        elif filter_by == 'none':
            month = 'all'
            day = 'all'
            
            break
            
        else:
            print ('invalid input, please try again \n')
            
    print('-'*40)
    
    return city, month, day
#########################################################################################################################

def load_data(city, month, day):
    
    """
Loads data for the specified city and filters by month and day if applicable.

Args:
    (str) city - name of the city to analyze
    (str) month - name of the month to filter by, or "all" to apply no month filter
    (str) day - name of the day of week to filter by, or "all" to apply no day filter
Returns:
    df - Pandas DataFrame containing city data filtered by month and day
"""
    #make the return value global to use it outside the function 
    global df
    
    #read the data of spacific city ---- (returned from get_filters() function)
    df = pd.read_csv(CITY_DATA[city])
    
    #make Start Time column datetime type 
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    
    #make a new column month from Start Time column
    df['month'] = df['Start Time'].dt.month
    
    #make a new column day_of_week from Start Time column
    df['day_of_week'] = df['Start Time'].dt.day_name()
 
    #filter by the month of the week to creat a new dataframe
    if month != 'all':
        #use the index of the months list to get the corresponding int 
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1
        df = df[df['month'] == month]
    
    elif month == 'all':
        df = df

    #filter by the day to creat a new dataframe    
    if day != 'all':
        df = df[df['day_of_week'] == day.title()]
    
    elif day == 'all':
        df = df

    return df
##########################################################################################################################

def time_stats(df):
    """Displays statistics on the most frequent times of travel."""
    
    print('\nCalculating The Most Frequent Times of Travel...\n')
    
    start_time = time.time()
    
    #get the most popular hour, and calculate its count 
    df['hour'] = df['Start Time'].dt.hour
    popular_hour = df['hour'].mode()[0]
    popular_hour_count = df['hour'].value_counts().max()
    
    #get the most popular day, and calculate its count 
    popular_day = df['day_of_week'].mode()[0]
    popular_day_count = df['day_of_week'].value_counts().max()
    
    #get the most popular month, and calculate its count 
    month_num  = df['month'].mode()[0]
    months = ['january', 'february', 'march', 'april', 'may', 'june']
    popular_month = months[month_num-1]
    
    popular_month_count = df['month'].value_counts().max()
            
    #print the most popular hour and day, if month filter is applied
    if day == 'all' and  month != 'all':
        print ('Most popular hour: {} ---- Count: {}. '.format(popular_hour, popular_hour_count))
        print ('Most popular day: {} ---- Count: {}.'.format(popular_day, popular_day_count))
        
    #print the most popular hour and month, if day filter is applied
    elif month == 'all' and day != 'all':
        print ('Most popular hour: {} ---- Count: {}. '.format(popular_hour, popular_hour_count))
        print ('Most popular month: {} ---- Count: {}.'.format(popular_month, popular_month_count ))
    
    #print the most popular hour, if both filter is applied
    elif month != 'all' and day != 'all':
        print ('Most popular hour: {} ---- Count: {}. '.format(popular_hour, popular_hour_count))
    
    #print the most popular hour, day, and month, if none filter is applied
    else:
        print ('Most popular hour: {} ---- Count: {}. '.format(popular_hour, popular_hour_count))
        print ('Most popular month: {} ---- Count: {}.'.format(popular_month, popular_month_count ))
        print ('Most popular day: {} ---- Count: {}.'.format(popular_day, popular_day_count))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)
#####################################################################################################################

def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    
    start_time = time.time()
    
    #get the most popular start station and get its count
    popular_start_station = df['Start Station'].mode()[0]
    popular_start_station_count = df['Start Station'].value_counts().max()

    #get the most popular end station and get its count
    popular_end_station = df['End Station'].mode()[0]
    popular_end_station_count = df['End Station'].value_counts().max()
    
    #get the most popular trip and get its count
    df['Trip'] = df['Start Station'] + df['End Station']
    popular_trip = df['Trip'].mode()[0]
    popular_trip_count = df['Trip'].value_counts().max()

    print ('Most popular start station: {} ---- count: {} \nMost popular end station: {} ---- count: {}'.format(popular_start_station, popular_start_station_count, popular_end_station, popular_end_station_count ))
    print ('Most popular trip: {} ---- count: {}'.format(popular_trip, popular_trip_count))
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)
##########################################################################################################################

def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # calculating total travel time and count the number of total trips
    total_travel_time = df['Trip Duration'].sum()
    total_travel_time_count = df['Trip Duration'].count()

    # calculating average travel time
    mean_travel_time = df['Trip Duration'].mean()    

    print ('Total travel time: {} ---- count: {} \n Avreage travel time: {}'.format(total_travel_time, total_travel_time_count, mean_travel_time))
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)
###########################################################################################################################

def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()
    
    # calculating counts of user types
    count_user_types = df['User Type'].value_counts()

    
    if city == 'washington' :
        print('Subscribers: {} \nCustomers:{} '.format(count_user_types[0], count_user_types[1] ))
        print('There\'s no gender or birth year data in washington city')
        
    else:
        # calculating counts of gender
        count_gender = df['Gender'].value_counts()

        # Display common birth year, youngest year, and oldeast year
        common_birth_year = df['Birth Year'].mode()[0]
        youngest_year = df['Birth Year'].max()
        oldest_year = df['Birth Year'].min()

        #calculating the counts of common birth year, youngest year, and oldeast year
        common_birth_year_count = df['Birth Year'].value_counts().max()
        youngest_year_count = df['Birth Year'].value_counts()[youngest_year]
        oldest_year_count = df['Birth Year'].value_counts()[oldest_year]

        print('Subscribers: {} \nCustomers:{} '.format(count_user_types[0], count_user_types[1] ))
        print('Males: {} \nFemales: {}'.format(count_gender[0], count_gender[1]))
        print('Most common year: {} ---- count: {}'.format(common_birth_year, common_birth_year_count))
        print('Youngest year: {} ---- count: {}'.format(youngest_year, youngest_year_count))
        print('Oldest year: {} ---- count: {}'.format(oldest_year, oldest_year_count))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)
########################################################################################################################

def show_five_rows(df):
    
    counter = 0
    
    while True:
        answer = input('Would you like to see the raw data? (Yes or NO)\n') 
        answer = answer.lower().strip()
        
        if answer == 'yes':
            row1 = df.iloc[counter]
            row2 = df.iloc[counter+1]
            row3 = df.iloc[counter+2]
            row4 = df.iloc[counter+3]
            row5 = df.iloc[counter+4]
            
            counter += 5
         
            print(row1)
            print('-'*40)
            print(row2)
            print('-'*40)
            print(row3)
            print('-'*40)
            print(row4)
            print('-'*40)
            print(row5)
            print('-'*40)
            
            
        elif answer == 'no':
            break
        
        else:
            print('Invalid input, please try again')
#######################################################################################################################

def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        show_five_rows(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()
######################################################################################################################

Would you like to see data for Chicago, New York, or Washington? 
new york
Would you like to filter the data by month, day, both, or not at all? (type [none] for no time filter)
both
Which month - January, February, March, April, May, or June? 
may
Which day - Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, or Sunday? 
 monday
----------------------------------------

Calculating The Most Frequent Times of Travel...

Most popular hour: 18 ---- Count: 1075. 

This took 0.015650510787963867 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

Most popular start station: Pershing Square North ---- count: 92 
Most popular end station: Pershing Square North ---- count: 92
Most popular trip: Central Park S & 6 Ave5 Ave & E 88 St ---- count: 8

This took 0.03127288818359375 seconds.
----------------------------------------

Calculating Trip Duration...

Total travel time: 8164174 ---- count: 9242 
 Avreage travel time: 883.3774074875568