In [1]:
#Imports
import numpy as np
import pandas as pd
import time
from colorama import Fore, Style

In [13]:
#Dictionary storing the name of the datasets available
city_bikeshare_data = {'chicago' : '\chicago.csv', 'new york city' : '\\new_york_city.csv', 'washington' : '\washington.csv'}

In [3]:
months = ['all', 'january', 'february', 'march', 'april', 'may', 'june']
days = ['all', 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday']

In [4]:
def get_month():
    print('\nEnter the month according to which you want to filter data')
    return input().lower()

def get_day():
    print('\nEnter the day according to which you want to filter data')
    return input().lower()

def get_filters():
    print('Hello.\nMy name is Mayur\nLet\'s explore some bikesharing data')
    while True: # The while loop ensures user input is interactive. If invalid inputs are encountered, appropriate directions
                # are given to the use in order to obtain the input
        print('\nWhich city\'s bikesharing data do you want to analyze ?')
        city = input().lower()
        
        if(city in city_bikeshare_data):
            
            print('\nDo you want to filter data by month, day or both.')
            print('\'both\' to apply both filters and \'none\' for no filter') #Asks for filters.
            filters = input()
            
            if(filters == 'both'):
                month = get_month()
                day = get_day()
            elif(filters == 'month'):
                month = get_month()
                day = 'all'
            elif(filters == 'day'):
                day = get_day()
                month = 'all'
            elif(filters == 'none'):
                month = 'all'
                day = 'all'
                break
            else:
                print(Fore.RED +'\nThe input was incorrect')
                print(Style.RESET_ALL)
                continue
        else:
            print(Fore.RED + '\nThe city name is not in the data set')
            print(Style.RESET_ALL)
            continue
            
        if(month not in months or day not in days):
            print(Fore.RED +'\nThe month or day was incorrect')
            print(Style.RESET_ALL)
            continue
        else:
            break
    
    return city, month, day

In [5]:
def load_data(city, month, day):
    
    df = pd.read_csv(r"C:\Users\VANI\Desktop" + city_bikeshare_data.get(city))
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['month'] = df['Start Time'].dt.month
    df['day'] = df['Start Time'].dt.weekday_name
    
    if(month!='all'):
        month = months.index(month)
        df = df[df['month'] == month]
    if(day!='all'):
        df = df[df['day'] == day.title()]
    
    return df

In [6]:
def time_stats(df):
    
    print('\nCalculating most frequent times of travel.....\n')
    
    comm_month = df['month'].mode()
    comm_day = df['day'].mode()
    df['hour'] = df['Start Time'].dt.hour
    comm_hour = df['hour'].mode()
    
    print(Fore.BLUE + "Most common month: {}".format(comm_month.item()))
    print("Most common day: {}".format(comm_day.item()))
    print("Most common hour: {}".format(comm_hour.item()))
    print(Style.RESET_ALL)
    print('-'*40)

In [7]:
def station_stats(df):
    
    print('\nCalculating most frequently used stations and trip......')
    
    comm_start_station = df['Start Station'].mode()
    comm_end_station = df['End Station'].mode()
    df['trips'] = df['Start Station']  + " To " +  df['End Station']
    comm_trip = df['trips'].mode()
    
    print(Fore.GREEN + "Most common start station: {}".format(comm_start_station.item()))
    print("Most common end station: {}".format(comm_end_station.item()))
    print("Most common trip: {}".format(comm_trip.item()))
    print(Style.RESET_ALL) 
    print('-'*40)

In [8]:
def user_stats(df):
    
    print('\nCalculating user statistics.....\n')
    
    print(df['User Type'].value_counts())
    if('Gender' in df.columns):
        print("\n", df['Gender'].value_counts())
        print(Fore.YELLOW + "\nEarliest Birth Year: {}".format(df['Birth Year'].min()))
        print("Most Recent Birth Year: {}".format(df['Birth Year'].max()))
        print("Most common Birth Year: {}".format(df['Birth Year'].mode().item()))
        print(Style.RESET_ALL) 
        print('-'*40)

In [9]:
def trip_stats(df):
    print('\nCalculating trip statistics.....\n')
    
    print(Fore.CYAN+'\nTotal Travel Time: {}{}'.format(df['Trip Duration'].sum()/3600, ' hours'))
    print('Mean Travel Time: {}{}'.format(df['Trip Duration'].mean()/60, ' minutes'))
    print(Style.RESET_ALL) 
    print('-'*40)

In [10]:
def bike_share_analysis():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_stats(df)
        user_stats(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break

In [14]:
bike_share_analysis()

Hello.
My name is Mayur
Let's explore some bikesharing data

Which city's bikesharing data do you want to analyze ?
new York city

Do you want to filter data by month, day or both.
'both' to apply both filters and 'none' for no filter
none

Calculating most frequent times of travel.....

[34mMost common month: 6
Most common day: Wednesday
Most common hour: 17
[0m
----------------------------------------

Calculating most frequently used stations and trip......
[32mMost common start station: Pershing Square North
Most common end station: Pershing Square North
Most common trip: E 7 St & Avenue A To Cooper Square & E 7 St
[0m
----------------------------------------

Calculating trip statistics.....

[36m
Total Travel Time: 74973.68 hours
Mean Travel Time: 14.994736 minutes
[0m
----------------------------------------

Calculating user statistics.....

Subscriber    269149
Customer       30159
Name: User Type, dtype: int64

 Male      204008
Female     66783
Name: Gender, dtype: int