Insights we will be pulling: 

#1 Popular times of travel (i.e., occurs most often in the start time)

    most common month
    most common day of week
    most common hour of day

#2 Popular stations and trip

    most common start station
    most common end station
    most common trip from start to end (i.e., most frequent combination of start station and end station)

#3 Trip duration

    total travel time
    average travel time

#4 User info

    counts of each user type
    counts of each gender (only available for NYC and Chicago)
    earliest, most recent, most common year of birth (only available for NYC and Chicago)

# Final project. Completed 11/6/23. Verified 11/18/23



In [4]:
#Intro
# Importing requried libraries
import pandas as pd
import numpy as np
import datetime
source_data={'chicago':'chicago.csv','washington':'washington.csv','new york city':'new_york_city.csv'}

# loads the data into a df to manipulate
def data_load(month,city,date):
    month=month.lower()
    city=city.lower()
    date=date.lower()
   
    city_select=pd.read_csv(source_data[city])
    # convert the Start Time column to datetime
    city_select['Start Time'] = pd.to_datetime(city_select['Start Time'])
    
    #Creates new columns month& day of week based on df.
    city_select['month'] = city_select['Start Time'].dt.month
    city_select['day_of_week'] = city_select['Start Time'].dt.day_name()
    city_select['day_of_week'] = city_select['day_of_week'].str.lower()
    #print(city_select['month'].head())
    
    #Create monthly filter

    if month != "all":
        months={'january':1, 'february':2, 'march':3, 'april':4, 'may':5, 'june':6,'july':7, 'august':8,'september':9,'october':10,'november':11,'december':12}
        month=months[month]
    
        city_select=city_select[city_select['month']==month]
    
    
       # filter by day of week if applicable
    if date != 'all':
        # filter by day of week to create the new dataframe
        city_select = city_select[city_select['day_of_week']==date]

    
    return city_select

def filters():

    while True:
        city=input("Which city would you like to see data for? Chicago, Washington, or New York City. ")
        if city.lower() not in ('washington', 'new york city','chicago'):
            print("Sorry that input isn't valid, please try again")
            continue
        else:
            break
    
    month=input("What about the month? Type \"all\" for the entire year.")
    while month.lower() not in ('all','january', 'february', 'march', 'april', 'may', 'june'):
        month = input("Sorry that input is invalid or doesn't have data. Please try another month.")
   
    while True:
        datetp=input("Are you looking for a specific day? Monday, Tuesday, Wednesday...Sunday? Type 'all' to search the whole week. ")
        if datetp.lower() not in ('monday', 'tuesday','wednesday','thursday','friday','saturday','sunday', 'all'):
            print("Sorry getting an error with that input. Please try again")
            continue
        else: 
            break
    
    
    return month,city,datetp

'''#2 Popular stations and trip

most common start station
most common end station
most common trip from start to end (i.e., most frequent combination of start station and end station)
'''
def popstation(df):
    popstart= df['Start Station'].mode()
    popend=df['End Station'].mode()
    #Combines the start and end stations in order to find the most common to & froms
    df['Full Trip']= df['Start Station']+' '+df['End Station']
    fulltrip=df['Full Trip'].mode()
    print('The most popular station to leave from is {} and arrive at is {}'.format(popstart[0],popend[0])) 
    print('The most popular start to end trip is {}.'.format(fulltrip.iloc[0]))
 
'''
#3 Trip duration

total travel time
average travel time
'''
def TripDur(df):
    #Total Travel Time for the city
    avg_travel = df.loc[:,'Trip Duration'].mean() # using loc.[row_name,column_name] using[:,] means all rows
    avg_travel=int(avg_travel)
    print("The average travel time is about {} minutes.".format(avg_travel))
    
    tot_travel = df.loc[:,'Trip Duration'].sum()
    tot_travel_hours = int(tot_travel/60)
    travel_days = int(tot_travel/60/24)
    print("The total travel time for this range is about {} minutes, {} hours, or {} days worth of travel time.".format(tot_travel,tot_travel_hours,travel_days))


def date_data(df):
    '''#1 Popular times of travel (i.e., occurs most often in the start time)
        most common month
        most common day of week
        most common hour of day
    ''' 
    #converts the numeric month into a string then finds the most repeated
    df['month']=pd.to_datetime(df['month'],format='%m').dt.month_name()
    popmonth= df['month'].mode()
    
    #Converts the original start time column, which is a datetime from dataload, into the str date.
    df['day_of_week']=df['Start Time'].dt.day_name()
    popday =df['day_of_week'].mode()
    #popmonthseries= pd.Series(df['day_of_week'])
    ## alt code to not use loc[0] **popmonth=df.day_of_week.mode()[0]
    
   
    print('The most popular month is {}'.format(popmonth.loc[0]))
    print('The most popular day is {}'.format(popday.loc[0]))
    

def user_data(df, city):
    #Creating catch for washington, after reviewing data there is no data in csv for gender or birth years
    usertype = df.groupby(['User Type'])['User Type'].count()
    print(f"There are {usertype.iloc[0]} customeres and {usertype.iloc[1]} subscribers.")
    
    if city != 'washington':
        #groupby(['Column(s)to create groupings'])['Column operation will be peformed on'].function()
        usergender = df.groupby(['Gender'])['Gender'].count()

        firstbirth= int(df['Birth Year'].min())
        recentbirth = int(df['Birth Year'].max())
        commonbirthyr = df.groupby(['Birth Year'])['Birth Year'].count()
        commonbirthyr = int(commonbirthyr.idxmax())
        #commonbirthyrtw = df['Birth Year'].mode()
        #commonbirthyrtw= int(commonbirthyrtw.iloc[0])



        
        print(f"Based on the current data there are {usergender.iloc[0]} females and {usergender.iloc[1]} males in this sample size.")
        print('The most common birth year is {}.'.format(commonbirthyr))
        print('The most earliest birth year is {}.'.format(firstbirth))
        print(f"The most recent birth year is {recentbirth}.")        
    else:
        print("Sorry no gender or birth data for this city")
    
#main block to take the users inputs
def main():
    
    print("\nHello Welcome to the BikeShare Database\n")
    
    month,city,datetp=filters()
        
    df=data_load(month,city,datetp)
    
    date_data(df)
    popstation(df)
    TripDur(df)
    user_data(df,city)
    

if __name__ =='__main__':
    main()


Hello Welcome to the BikeShare Database



Which city would you like to see data for? Chicago, Washington, or New York City.  chciago


Sorry that input isn't valid, please try again


Which city would you like to see data for? Chicago, Washington, or New York City.  all


Sorry that input isn't valid, please try again


Which city would you like to see data for? Chicago, Washington, or New York City.  chicago
What about the month? Type "all" for the entire year. all
Are you looking for a specific day? Monday, Tuesday, Wednesday...Sunday? Type 'all' to search the whole week.  all


The most popular month is June
The most popular day is Tuesday
The most popular station to leave from is Streeter Dr & Grand Ave and arrive at is Streeter Dr & Grand Ave
The most popular start to end trip is Lake Shore Dr & Monroe St Streeter Dr & Grand Ave.
The average travel time is about 936 minutes.
The total travel time for this range is about 280871787 minutes, 4681196 hours, or 195049 days worth of travel time.
There are 61110 customeres and 1 subscribers.
Based on the current data there are 57758 females and 181190 males in this sample size.
The most common birth year is 1989.
The most earliest birth year is 1899.
The most recent birth year is 2016.
