<a href="https://colab.research.google.com/github/RowainaMohamed/US_BikeShare_Data/blob/main/code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs  
    while True:
      city = input('Please select a city name from (chicago, washington, new york city): ').lower()
      if city == 'chicago' or city == 'new york city' or city == 'washington':
        break
      else:
         print('Invalid City Name')
      
    # get user input for month (all, january, february, ... , june)
    while True:
      my_dic = {'1': 'january', '2': 'february', '3': 'march', '4': 'april', '5': 'may', '6': 'june',  'all': 'all'}
      month = input('Please Enter a month nuumber from 1 to 6 or select all: ').lower()
      if month in my_dic:
        month = my_dic[month]
        
        break
      else:
        print('Number is Invalid or Not in Range')
       
    # get user input for day of week (all, monday, tuesday, ... sunday)
    while True:
      my_dic1 = {'1': 'monday', '2': 'tuesday', '3': 'wednesday', '4': 'thurday', '5': 'friday', '6': 'saturday', '7': 'sunday', 'all': 'all'}
      day = input('Please Enter a day number from 1 to 7 or select all: ').lower()
      if day in my_dic1:
        day = my_dic1[day]
        break
      else:
        print('Number is Invalid or Not in Range')

    print('-'*40)
    return city, month, day

def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    # load data file into a dataframe
    df = pd.read_csv(CITY_DATA[city])

    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # extract month and day of week from Start Time to create new columns   
    df['month'] = df['Start Time'].dt.month_name()
    df['day_of_week'] = df['Start Time'].dt.day_name()
    
    # filter by month if applicable
    if month != 'all':
        # filter by month to create the new dataframe
      df = df[df['month'] == month.title()]

    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
      df = df[df['day_of_week'] == day.title()]
    return df

def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # display the most common month
    most_common_month = df['month'].mode()[0]
    print('\nMost Popular Start Month:', most_common_month)

    # display the most common day of week
    df['day'] = df['Start Time'].dt.week
    most_common_day_of_week = df['day'].mode()[0]
    print('\nMost Popular Start day:', most_common_day_of_week)
    # display the most common start hour
    df['hour'] = df['Start Time'].dt.hour
    most_common_hour = df['hour'].mode()[0]
    print('\nMost Popular Start Hour:', most_common_hour)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    MCSS = df['Start Station'].value_counts().idxmax()
    print("\nThe most commonly used start station :", MCSS)

    # display most commonly used end station
    MCES = df['End Station'].value_counts().idxmax()
    print("\nThe most commonly used end station :", MCES)

    # display most frequent combination of start station and end station trip
    combination = df.groupby(['Start Station','End Station']).size().nlargest(1)
    print('\nThe most frequent combination of start station and end station trip is:\n', combination)


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    ttt = df['Trip Duration'].sum()
    print('\nThe total travel time: ',ttt)


    # display mean travel time
    mtt = df['Trip Duration'].mean()
    print('\nThe mean travel time: ',mtt)


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def user_stats(df, city):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    user_count = df['User Type'].value_counts()
    print(user_count)

    # Display counts of gender
    if city != 'washington':
      df['Gender'].fillna(method = 'ffill', inplace = True)
      gender_count = df['Gender'].value_counts()
      print('\nThe Gender user count is:\n', gender_count)

    # Display earliest, most recent, and most common year of birth
    if city != 'washington':
      df['Birth Year'].fillna(method = 'ffill', inplace = True)
      earliest_birth_year_count = df['Birth Year'].sort_values().min()
      most_recent_birth_year_count = df['Birth Year'].sort_values().max()
      most_common_birth_year_count = df['Birth Year'].sort_values().mode()[0]
      print('\nThe earliest year of birth count is: ',int(earliest_birth_year_count))
      print('\nThe most recent year of birth count is: ',int(most_recent_birth_year_count))
      print('\nThe most common year of birth count is: ',int(most_common_birth_year_count))


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

def raw_data(df):
    '''This a function to display raw data to the user if requested
     '''
    x = 0
    request = input("Would you like to display raw data? (yes/no) ").lower() 
    
    while True:            
        if request == 'no':
            break
        elif request == 'yes':
             print(df.head(5+x))
             request = input("Would you like to display more raw data? (yes/no) ").lower()
             x += 5
        else:
            request = input("Please enter 'yes' or 'no'").lower()


def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df, city)
        raw_data(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()
