In [2]:
'''This program's goal is to analyze some bike share data for one of three cities based on user choice.
It begins by interacting with the user to collect his/her choices of city and the type of prefered data filter/s.
The following steps are executed everytime the user is asked a question:
1- The question is shown to the user and his answer is collected by the program.
2- The user input is evaluated by the program. 
3- If the answer is an acceptable value, the program proceeds to the next question.
4- If the answer is an unacceptable value, the program prompts the user to correct his input and try again.
5- Then steps 2, 3, and 4 are repeated one more time giving the user a third chance to enter a correct value.
6- If the third collected input is an unacceptable value, the program prints a message telling the user that it terminated
because it received several unacceptable values.
If all user inputs are accepted, then the program analyzes the data based on the collected crieteria 
and shows the results'''

import pandas as pd
import time
import sys

'''The following variables hold the different messages and prompts that will be used by the (generate_message) function
to generate the appropriate response based on the user input and the program stage. 
The first four variables have a zero at the end of their names. It refers to the first time the message
is printed to the user to ask him about a city choice, a filter choice, a month choice or a day choice. the zero
also indcates that no error has been committed yet by the user.'''

city_request_0 = '''Hello. Let's explore some Bikeshare data. Which city are you interested in exploring; 
Chicago, New York, or Washington? Type '''
filter_request_0 = '''So you are interested in {0}. Great, Do you like to filter {0}\'s data by month, day, 
both or not at all? Please type '''
month_request_0 = 'What month? Type '
day_request_0 = 'What day? Type '

'''The following two lines have two sets of four variables each. Each one of them represents the first part of 
a message that is shown to the user if he/she makes one or two errors entering invalid values. Thus (1) in a variable 
name means that this variable is used when the user has committed one error entering a value. (2) means that 
he/she has committed two errors.'''

city_request_1 = filter_request_1 = month_request_1 = day_request_1 = 'You have entered an inappropriate value. Please type '
city_request_2 = filter_request_2 = month_request_2 = day_request_2 = 'Kindly try again. You need to enter '

'''The following four variables represent the second part of the eight variables above. Each has a list with
the correct options that the user should choose from.'''
city_options = '''c for Chicago, n for New York or w for Washington.
Capitalization doesn\'t matter.\n'''
filter_options = 'm for month, d for day, b for both or n for no filter\n'
month_options = '''a value from this list: [Jan, Feb, March, April, May or June]. 
Capitalization doesn't matter\n'''
day_options = '''a number from this list: [1 for Monday, 2 for Tuesday, 3 for Wednesday, 
4 for Thursday, 5 for Friday, 6 for Saturday or 7 for Sunday\n'''

# quitting_notification: A message that is shown to the user when he/she fails three consecutive times to enter valid input.
quitting_notification = 'The program is terminated. Incorrect input received several times\n'

'''The following four variables have the exact options that the user should use to choose his input from. They 
are used to check whether the user has entered a correct value or not (using the 'in' membership). Notice 
that the (day_correct_input) is a list with items [1, 2, 3, 4, 5, 6, 7] because the user should enter 
a number to refer to a day.'''
city_correct_input = ['c', 'n', 'w']
filter_correct_input = ['m', 'd', 'b','n']
month_correct_input = ['jan', 'feb', 'march', 'april', 'may', 'june']
day_correct_input = str(list(range(1, 8)))

# city_Names: This is a dictionary that is used to convert a letter entered by the user into the corresponding city name.
# It is used two times in the program. Once in the message that asks the user for a filter. The other time when 
# using the user city choice as input to the load_data function because it expects a whole city name, not a letter.
city_names = {'c':'Chicago', 'n':'New York', 'w': 'Washington'}

# CITY_FILE_NAMES; a dictionary with the three city files names. It will be used to load the data from the requested file.
CITY_FILE_NAMES = {'c' : 'chicago.csv', 'n' : 'new_york_city.csv', 'w' : 'washington.csv'}

months_as_numbers = {'jan' : 1, 'feb' : 2, 'march' : 3, 'april' : 4, 'may' : 5, 'june' : 6}

output_titles = ['POPULAR TIMES STATISTICS', 'STATIONS STATISTICS', 'TRAVEL TIME STATISTICS', 'USER INFO STATISTICS']

def generate_message(error_no, request_type = "", city_chosen = ""):
    ''' generate the different messages and prompts according to the stage of the program. 
    Args:
    error_no: int. The number of errors committed by the user in inputting the requested data. 0 means the user has not 
    committed any error yet. 1 means that he/she has made one error in inputting data. 2 means it is the second error, etc. 
    request_type: string. It should be either city, filter, month or day. These refer to the type of request. We could be 
    requesting a city, a filter option, a month, or a day. 
    city_chosen: string. It denotes the city chosen by the user'''

    if error_no == 3: print(quitting_notification) # if this is the third error of the user inputting data, tell the user that the program is closed.
    else:
        return globals()[request_type + '_request_' + str(error_no)].format(city_chosen) + globals()[request_type + '_options']

def load_data(city, month, day):
    """te
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - city - name of the city to analyze
        (int or str) month - name of the month to filter by, or "all" to apply no month filter
        (int or str) day - number refering to the day of the week where 1 means Monday and 7 means Sunday, or "all" to apply no day filter
    Returns:
        df - pandas DataFrame containing city data filtered by month and day
    """

    # load data file into a dataframe
    df = pd.read_csv(city)

    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['End Time'] = pd.to_datetime(df['End Time'])
    df['trip'] = 'From ' + df['Start Station'] + ', To ' + df['End Station']

    # extract month and day of week from Start Time to create new columns
    df['month'] = pd.DatetimeIndex(df['Start Time']).month
    df['day_of_week'] = df['Start Time'].dt.dayofweek + 1
    df['hour'] = df['Start Time'].dt.hour
    # The above line was [df['Start Time'].dt.weekday_name] and was not working here because of pandas version incompatibility
    # filter by month if applicable
    if month != 'all':
        # use the index of the months list to get the corresponding int
        #months = ['january', 'february', 'march', 'april', 'may', 'june']
        #month = months.index(month)+1

        # filter by month to create the new dataframe
        df = df[(df.month == month)]

    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        df = df[(df.day_of_week == day)]
#    print(df.shape)
#    print(df.head())
    return df

def correct_input(user_input, correct_choices):
    '''Returns True if the user input is correct and false if it is not.
    Parameters: 
    user_input is the input collected from the user.
    correct_choices is a list of all possible correct responses of which one is expected from the user.'''
    if user_input in correct_choices:
        return True
    else:
        return False

def get_month_day(user_filter_choice):
    '''This function takes the user choices of filter which should be from this list: [b, m, d, n] and based on
    their values returns both month and day filters'''
    if user_filter_choice.lower() == 'b': # The user chose to filter by month and day
        month = data_request('month', month_correct_input) # Get month from user
        month = months_as_numbers[month.lower()] # Convert user choice of month in the form of letters into a number
        day = int(data_request('day', day_correct_input)) # Get day from user
    elif user_filter_choice.lower() == 'm': # The user chose to filter by month alone
        month = data_request('month', month_correct_input) # Get month from user
        month = months_as_numbers[month.lower()] # Convert user choice of month in the form of letters into a number
        day = 'all' # Set day to no filter
    elif user_filter_choice.lower() == 'd': # The user chose to filter by day
        day = int(data_request('day', day_correct_input)) # Get day from user
        month = 'all' # Set month to no filter
    else: # The user chose not to filter the data
        month = 'all' # Set month to no filter
        day = 'all' # Set day to no filter
    return (month, day)

def data_request(request_type, choices_list, city_chosen = ''):
    '''This is used everytime the program needs to collect user choices such as his choice of city or filter.
    request_type parameter is a string from this list: ['city', 'filter', 'month', 'day']
    choices_list is a list with the correct choices of which one is expected from the user
    city_chosen is the city that the user has chosen if any
    This function returns the user choice be it a city, a filter type a day or a month depending on the parameters'''
    error_no = 0
    while error_no <=3:
        if error_no == 3:
            generate_message(error_no, request_type)
            sys.exit()
        response = input(generate_message(error_no, request_type, city_chosen))
        if correct_input(response.lower(), choices_list): break
        error_no += 1
    return response

def get_popular_times(data_frame, city = '', month_filter = '', day_filter= ''):
    if month_filter != 'all':
        print('The most popular month is meaningless because you filtered the data by month.')
    else:
        popular_month = b['month'].mode()[0]
        month_count = b['month'].value_counts()
        print('The most popular month: {}     Count = {}'.format(popular_month, month_count.iloc[0]))
    if day_filter != 'all':
        print('The most popular day is meaningless because you filtered the data by day')
    else:
        popular_day = b['day_of_week'].mode()[0]
        day_count = b['day_of_week'].value_counts()
        print('The most popular day:   {}     Count = {}'.format(popular_day, day_count.iloc[0]))
    popular_hour = b['hour'].mode()[0]
    popular_hour_count = b['hour'].value_counts()
    print('The most popular hour:  {}     Count = {}\n'.format(popular_hour,  popular_hour_count.iloc[0] ))

def get_popular_stations(data_frame, city = '', month_filter = '', day_filter= ''):
    popular_start_station = b['Start Station'].mode()[0]
    popular_end_station = b['End Station'].mode()[0]
    popular_trip = b['trip'].mode()[0]
    end_station_count =b['End Station'].value_counts()[0]
    start_station_count = b['Start Station'].value_counts()[0]
    trip_count = b['trip'].value_counts()[0]
    print("Most popular start station: {}     Count: {}".format(popular_start_station, start_station_count))
    print("Most popular end station: {}     Count: {}".format(popular_end_station, end_station_count))
    print('Most popular trip: {}     Count: {}\n'.format(popular_trip, trip_count))

def get_travel_times(data_frame, city = '', month_filter = '', day_filter= ''):
    total_duration = b['Trip Duration'].sum()/(3600 * 24)
    average_duration = b['Trip Duration'].mean()
    print('Total travel time in DAYS: {} days'.format(total_duration))
    print('Average travel time in SECONDS: {} seconds\n'.format(average_duration))

def get_user_info(data_frame, city = '', month_filter = '', day_filter= ''):
    if city.lower() == 'w':
        user_type = b['User Type'].value_counts().to_frame()
        print('The count of user types:\n{}\n'.format(user_type))
        print('There in no information about gender and birth year in Washington file\n')
    else:
        user_type = b['User Type'].value_counts().to_frame()
        print('The count of user types:\n{}\n'.format(user_type))
        b['Gender'].to_csv('gender.csv')
        gender = b['Gender'].value_counts().to_frame()
        print('The count of male and female:\n{}\n'.format(gender))
        birth_counts = b['Birth Year'].value_counts().to_frame()
        print('The most common bike riders with regard to age were born in {}'.format(birth_counts.index[0]))
        print('The least common bike riders with regard to age were born in {}\n'.format(birth_counts.index[-1]))
        birth_column = b['Birth Year']
        print('The youngest bike rider was born in {}'.format(birth_column.max()))
        print('The oldest bike rider was born in {}\n'.format(birth_column.min()))

''' The following part represents the main part of the program. All the above are either variables or functions.
The next part will deploy such variables and functions to accomplish the requested statistics'''
def main():
    global b
    city = data_request('city', city_correct_input) # Requesting city from the user
    filters = get_month_day(data_request('filter', filter_correct_input, city_names[city.lower()])) # requesting filters
    b = load_data(CITY_FILE_NAMES[city.lower()], *filters) # Using the user choices as paramters to the load_data funtions to load & filter the data.
    counter = 0
    for afunc in (get_popular_times, get_popular_stations, get_travel_times, get_user_info):
        start = time.time()
        print(output_titles[counter])
        afunc(b, city, *filters)
        counter += 1
        print('Calculation time: {}\n'.format((time.time() - start)))    
    
    df_to_show = b[['trip', 'Trip Duration']] #Filtering the data to show per user consent
    response = input('Would you like to see the first 5 rows of data from the dataset? Type y for yes, n for no. Capitalization does\'t matter\n')

    s = 0 # beginning index of the first group of five records of data
    e = 5 # end index of the first group of five records of data
    while response.lower() == 'y':
        print(df_to_show[s:e], '\n')
        response = input('Would you like to see more rows? Type y for yes, any other letter for no.\n')
        s += 5 #incrementing the index of the beginning of the rows to show
        e += 5 #incrementing the index of the end of the rows to show

answer = 'y' # Default user response for running the program for the first time. if the user runs the program, it means he wants to run it at least once
while answer.lower() == 'y':
    if __name__ == '__main__':
        main()
    # Checking if the user would like to run the program again
    answer = input('All statistics have been calculated. Would you like to start again? type y for yes, any other letter for no.\n')

Hello. Let's explore some Bikeshare data. Which city are you interested in exploring; 
Chicago, New York, or Washington? Type c for Chicago, n for New York or w for Washington.
Capitalization doesn't matter.
c
So you are interested in Chicago. Great, Do you like to filter Chicago's data by month, day, 
both or not at all? Please type m for month, d for day, b for both or n for no filter
n
POPULAR TIMES STATISTICS
The most popular month: 6     Count = 98081
The most popular day:   2     Count = 45912
The most popular hour:  17     Count = 35992

Calculation time: 0.02094125747680664

STATIONS STATISTICS
Most popular start station: Streeter Dr & Grand Ave     Count: 6911
Most popular end station: Streeter Dr & Grand Ave     Count: 7512
Most popular trip: From Lake Shore Dr & Monroe St, To Streeter Dr & Grand Ave     Count: 854

Calculation time: 0.3111691474914551

TRAVEL TIME STATISTICS
Total travel time in DAYS: 3250.8308680555556 days
Average travel time in SECONDS: 936.23929 seconds
