In [None]:
import time
import pandas as pd
import numpy as np

In [None]:
# define a dictionary to map city names and their corresponding csv files
CITY_DATA = {'chicago'.title(): 'chicago.csv',
              'new york city'.title(): 'new_york_city.csv',
              'washington'.title(): 'washington.csv'}

In [None]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    global city, month, day
    while True:
        while True:
            # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
            # define a dictionary to map city names and their corresponding numbers
            city_dict = {'1': 'Chicago', '2': 'New York City', '3': 'Washington', 'Chicago': 'Chicago', 'New York City': 'New York City', 'Washington': 'Washington'}
            # Get user input for city
            city_input = input('\nPlease specify a city from the following options:\n(1) Chicago\n(2) New York City\n(3) Washington\n').title()
            # Check if city input is valid
            if city_input in city_dict:
                city = city_dict[city_input]
                break
            else:
                print('Invalid City Choice Please Try Again!!')
        while True:
            # get user input for month (all, january, february, ... , june)
            # Get user input for month
            months = ['All', 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
            month_input = input(f"\nPlease specify a month from the following options or enter 0 for no month filter:\n{', '.join([f'({i}) {month}' for i, month in enumerate(months)])}\n")
            try:
                month_num = int(month_input)
                if month_num in range(len(months)):
                    month = months[month_num]
                    break
                else:
                    print('Invalid Month Choice Please Try Again!!')
            except ValueError:
                month_name = month_input.title()
                if month_name in months:
                    month = month_name
                    break
                else:
                    print('Invalid Month Choice Please Try Again!!')
        while True:
            # get user input for day of week (all, monday, tuesday, ... sunday)
            # Get user input for day
            days = ["All", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
            day_input = input(f"\nPlease specify a day from the following options or enter 0 for no day filter:\n{', '.join([f'({i}) {day}' for i, day in enumerate(days)])}\n")
            try:
                day_num = int(day_input)
                if day_num in range(len(days)):
                    day = days[day_num]
                    break
                else:
                    print('Invalid day Choice Please Try Again!!')
            except ValueError:
                day_name = day_input.title()
                if day_name in days:
                    day = day_name
                    break
        break
    return city, month, day

In [None]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.
    
    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    global df
    # Load data for the specified city
    df = pd.read_csv(CITY_DATA[city])
    # Drop the Unnamed: 0 column if it exists
    df.drop('Unnamed: 0', axis=1, inplace=True)
    # Convert the Start Time and End Time columns to datetime objects
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['End Time'] = pd.to_datetime(df['End Time'])
    # Sort the DataFrame by Start Time and reset the index
    df.sort_values('Start Time', inplace=True)
    df.reset_index(inplace=True)
    df.drop('index', axis=1, inplace=True)
    # Filter the DataFrame by month and day if applicable
    if month == 'All' and day == 'All':
        df = df
    elif month == 'All' and day != 'All':
        df = df[df['Start Time'].dt.day_name() == day]
    elif day == 'All' and month != 'All':
        df = df[df['Start Time'].dt.month_name() == month]
    else:
        df = df[(df['Start Time'].dt.day_name() == day) & (df['Start Time'].dt.month_name() == month)]
    return df

In [None]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""
    
    # Print message to indicate that the function is calculating the most frequent times of travel
    print('\nCalculating The Most Frequent Times of Travel...\n')
    # Get the start time
    start_time = time.time()
    
    # display the most common month
    month_count = df['Start Time'].dt.month_name().value_counts().iloc[0]
    month_value = df['Start Time'].dt.month_name().value_counts().idxmax()
    print('The most common month for your choosen filter is', month_value, 'and Its counts are', month_count, 'Trips')
    
    # display the most common day of week
    day_count = df['Start Time'].dt.day_name().value_counts().head(1).iloc[0]
    day_value = df['Start Time'].dt.day_name().value_counts().head(1).idxmax()
    print('The most common day of week is', day_value, 'and Its counts are', day_count, 'times')
    
    # display the most common start hour
    hour_count = df['Start Time'].dt.hour.value_counts().iloc[0]
    hour_value = df['Start Time'].dt.hour.value_counts().idxmax()
    print('The most common hour at', hour_value, 'and Its counts are', hour_count, 'times')
    
    # Print the total time it took to run the function
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [None]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    sstation_count = df['Start Station'].value_counts().head(1).iloc[0]
    sstation_value = df['Start Station'].value_counts().head(1).idxmax()
    print('The most commonly used as start station', sstation_value, 'and Its counts are', sstation_count, 'times')

    # display most commonly used end station
    estation_count = df['End Station'].value_counts().head(1).iloc[0]
    estation_value = df['End Station'].value_counts().head(1).idxmax()
    print('The most commonly used as End Station', estation_value, 'and Its counts are', estation_count, 'times')


    # display most frequent combination of start station and end station trip
    sestation_count = df[['Start Station', 'End Station']].value_counts().head(1).iloc[0]
    sestation_value = df[['Start Station', 'End Station']].value_counts().head(1).idxmax()
    print('The most frequent combination of start station and end station trip', sestation_value, 'and Its counts are', sestation_count, 'times')


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [None]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    total_travel_time = df['Trip Duration'].sum()

    # Calculate the number of days
    days = total_travel_time // 1440

    # Calculate the number of years
    years = days // 365.25

    # Calculate the number of months
    months = (days % 365.25) // 30.44

    # Calculate the number of remaining days
    remaining_days = (days % 365.25) % 30.44

    # Calculate the number of hours
    remaining_minutes = total_travel_time % 1440
    hours = remaining_minutes // 60

    # Calculate the number of remaining minutes
    minutes = remaining_minutes % 60

    # Print the result
    print(f"Total travel time is {total_travel_time} minutes and is equal to {years} years, {months} months, {int(remaining_days)} days, {hours} hours, and {minutes} minutes.")


    # display mean travel time
    mean_travel_time = round(df['Trip Duration'].mean(), 2)

    # Calculate the number of days
    days = mean_travel_time // 1440

    # Calculate the number of hours
    remaining_minutes = mean_travel_time % 1440
    hours = remaining_minutes // 60

    # Calculate the number of remaining minutes
    minutes = round(remaining_minutes % 60, 2)

    # Print the result
    print(f"Mean travel time is {mean_travel_time} minutes and is equal to {days} days, {hours} hours, and {minutes} minutes.")

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [None]:
def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    user_types_counts = df['User Type'].value_counts()
    try:
        print("Subscriber count:", user_types_counts['Subscriber'])
    except:
        print('No Subscriber Users')
    try:
        print("Customer count:", user_types_counts['Customer'])
    except:
        print('No Customer Users')
    try:
        print("Dependent count:", user_types_counts['Dependent'])
    except:
        print('No Dependent Users')

    # Display counts of gender
    try:
        df['Gender']
        gender_counts = df['Gender'].value_counts()

        print("Male count:", gender_counts['Male'])
        print("Female count:", gender_counts['Female'])
    except:
        print('No Gender Data')

    # Display earliest, most recent, and most common year of birth
    try:
        df['Birth Year']
        earliest_year = int(df['Birth Year'].min())
        print('Most earliest Birth Year is', earliest_year)
        recent_year = int(df['Birth Year'].max())
        print('Most Recent Birth Year is', recent_year)
        most_year_count = int(df['Birth Year'].value_counts().iloc[0])
        most_year_value = int(df['Birth Year'].value_counts().idxmax())
        print('most common year of birth is'.title(), most_year_value, '!', most_year_count, 'was born that year')
    except:
        print('No Birth Year Data')


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [None]:
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break

In [None]:
if __name__ == "__main__":
	main()