<a href="https://colab.research.google.com/github/HendEmad/Udacity-FWD/blob/main/Explore_US_BikeShare_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Libraries
import time
import pandas as pd
import numpy as np

In [None]:
#Our Data
CITY_DATA = { 'chicago': '/content/chicago.csv',
              'new york city': '/content/new_york_city.csv',
              'washington': '/content/washington.csv' }

MONTH_DATA = ['all', 'january', 'february', 'march', 'april', 'may', 'june']

DAY_DATA = ['all', 'saturday', 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday']

In [None]:
#Get filters function----> Asks user to specify a city, month, and day to analyze.
def get_filters():
    
    '''Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter'''
    
    print('Hello! Let\'s explore some US bikeshare data!')
    #getting user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    input_city = ''
    while input_city.lower() not in CITY_DATA:
        input_city = input('\nWhat\'s your city? Is it chicago, new york city or washington?')
        if input_city.lower() in CITY_DATA:
            city = CITY_DATA[input_city.lower()]
        else: 
            print('We can\'t find this city on our data, please choose one of these cities: (chicago, new york city, washington)')

    #getting user input for month (all, january, february, ... , june)
    input_month = ''
    while input_month.lower() not in MONTH_DATA:
        input_month = input('\nWhat\'s the month?')
        if input_month.lower() in MONTH_DATA:
            month = input_month.lower()
        else:
            print('not found! input all if you don\'t want to filter data with month.')
            
    #getting user input for day of week (all, monday, tuesday, ... sunday)
    input_day = ''
    while input_day.lower() not in DAY_DATA:
        input_day = input('\nwhat\'s the day?')
        if input_day.lower() in DAY_DATA:
            day = input_day.lower()
        else:
            print('not found! input all if you don\'t want to filter data with day')

    print('-'*40)
    return city, month, day

In [None]:
#Load data function ---> Loads data for the specified city and filters by month and day if applicable.
def load_data(city, month, day):
    
    '''Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter'''
   #Return:
    
    # load data file into a dataframe
    df = pd.read_csv(city)

    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # extract month and day of week and Start Time and trip to create new columns
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()
    df['hour'] = df['Start Time'].dt.hour
    df['trip'] = df['Start Station'].astype(str) + ':' + df['End Station'].astype(str)
    # filter by month if applicable
    if month != 'all':
        # use the index of the months list to get the corresponding int
        month = MONTH_DATA.index(month)
    
        # filter by month to create the new dataframe
        df = df.loc[df['month'] == month]

    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        df = df.loc[df['day_of_week'] == day.title()]

    return df

In [None]:
def time_stats(df):
    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    #displaying the most common month
    most_common_month = df['month'].mode()[0]
    print('The most common month is: ', most_common_month)
    
    #displaying the most common day of week
    most_common_dayofweek = df['day_of_week'].mode()[0]
    print('The most common day of week is: ', most_common_dayofweek)

    #displaying the most common start hour
    most_common_starthour = df['hour'].mode()[0]
    print('The most common start hour is: ', most_common_starthour)
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [None]:
#station_stats function----> Displays statistics on the most popular stations and trip.
def station_stats(df):
    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    #displaying most commonly used start station
    most_common_startstation = df['Start Station'].mode()[0]
    print('The most commonly used start station is: ', most_common_startstation)

    #displaying most commonly used end station
    most_common_endstation = df['End Station'].mode()[0]
    print('The most commonly used end station is: ', most_common_endstation)

    #displaying most frequent combination of start station and end station trip
    most_frequent_trip = df['trip'].mode()[0]
    print('The most frequent combination of start station and end station trip is: ', most_frequent_trip)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [None]:
#trip_duration_stats function----> Displays statistics on the total and average trip duration.
def trip_duration_stats(df):
    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    #displaying total travel time
    total_travel_time = df['Trip Duration'].sum()
    print('The total travel time is: ', total_travel_time)

    #displaying mean travel time
    mean_travel_time = df['Trip Duration'].mean()
    print('The mean travel time is: ', mean_travel_time)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40) 

In [None]:
#user_stats function ----> Displays statistics on bikeshare users.
def user_stats(df):
    print('\nCalculating User Stats...\n')
    start_time = time.time()

    #Displaying counts of user types
    user_types_counts = df['User Type'].value_counts()
    print('User types counts are: ', user_types_counts)

    #Displaying counts of gender
    if 'Gender' in df.columns:
      gender_counts = df['Gender'].value_counts()
      print('Gender counts are: ', gender_counts)

    #Displaying earliest, most recent, and most common year of birth
    if 'Birth Year' in df.columns:
      earliest_yearOfBirth = df['Birth Year'].min( )
      print('The earliest year of birth is: ', earliest_yearOfBirth)
      mostRecent_yearOfBirth = df['Birth Year'].max()
      print('The earliest year of birth is: ', earliest_yearOfBirth)
      most_common_yearOfBirth = df['Birth Year'].mode()[0]
      print('The most common year of birth is: ', most_common_yearOfBirth)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [None]:
def display_data(df):
  start_loc = 0
  data = input('Do you want to display the data? enter yes or no').lower()
  while data == 'yes':
    n = int(input('Please enter integer no.of rows you want to display(eg. 1 5 8 100..est):  \n'))
    n += start_loc
    print(df[start_loc : n])
    data = input('Do you want to display more rows of data? enter yes or no').lower()
    start_loc = n        
    if data != 'yes' and data != 'no':
      data = input('Invalid input, please choose yes or no: ').lower()
    elif data == 'no':
      pass

In [None]:
#Another way to build [display_data(df)] function
'''
def display_data2(df):
  start_loc = 0
  while True:
    data = input('Do you want to display the data? enter yes or no').lower()
    if data not in ['yes', 'no']:
      data = input('Invalid input, please choose yes or no: ').lower()
    elif data == 'yes':
      n = int(input('Please enter integer no.of rows you want to display(eg. 1 5 8 100..est):  \n'))
      n += start_loc
      print(df[start_loc : n])
      more_data = input('Do you want to display more rows of data? enter yes or no').lower()
      if more_data == 'no':
        break
      elif more_data not in ['yes', 'no']:
        more_data = input('Invalid input, please choose yes or no: ').lower()
    elif data == 'no':
      break
'''

"\ndef display_data2(df):\n  start_loc = 0\n  while True:\n    data = input('Do you want to display the data? enter yes or no').lower()\n    if data not in ['yes', 'no']:\n      data = input('Invalid input, please choose yes or no: ').lower()\n    elif data == 'yes':\n      n = int(input('Please enter integer no.of rows you want to display(eg. 1 5 8 100..est):  \n'))\n      n += start_loc\n      print(df[start_loc : n])\n      more_data = input('Do you want to display more rows of data? enter yes or no').lower()\n      if more_data == 'no':\n        break\n      elif more_data not in ['yes', 'no']:\n        more_data = input('Invalid input, please choose yes or no: ').lower()\n    elif data == 'no':\n      break\n"

In [None]:
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        display_data(df)
#        display_data2(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()

Hello! Let's explore some US bikeshare data!

What's your city? Is it chicago, new york city or washington?new york
We can't find this city on our data, please choose one of these cities: (chicago, new york city, washington)

What's your city? Is it chicago, new york city or washington?new york city

What's the month?may

what's the day?saturday
----------------------------------------

Calculating The Most Frequent Times of Travel...

The most common month is:  5
The most common day of week is:  Saturday
The most common start hour is:  13

This took 0.004616975784301758 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

The most commonly used start station is:  Washington St & Gansevoort St
The most commonly used end station is:  South End Ave & Liberty St
The most frequent combination of start station and end station trip is:  Broadway & W 41 St:Broadway & W 36 St

This took 0.001699686050415039 seconds.
-----------------------------