In [1]:
# Importing packages and functions
import pandas as pd
import datetime # operations to parse dates
import time
import calendar
import csv

In [2]:
# supported cities, months, days
city_data = {'chicago': 'chicago.csv',
          'new york': 'new_york_city.csv',
          'washington': 'washington.csv'}
months = {v.lower(): k for k, v in enumerate(calendar.month_name)}
days = {v.lower(): k for k, v in enumerate(calendar.day_name)}

In [3]:
#get user input for city
def get_city():
    '''Asks the user for a city and returns the specified filter.
    Args:
        none.
    Returns:
        (str) City filter for the bikeshare data.
    '''
    while True:
        try:
            city = input('Hello! Let\'s explore some US bikeshare data!\nWould you like to see data for Chicago, New York, or Washington?\n')
        except ValueError:
            print('That is not a valid answer. Please try again.')
        if city.lower() in city_data.keys():
            return city.lower()
        else:
            print('That is not a valid answer. Please try again.')


In [4]:
def get_raw_city_data(city):
    '''Read CSV (comma-separated) file into DataFrame
    Args:
        city filter from get_city()
    Returns:
        raw_city_data df for the specified city's bikeshare data.
    '''
    raw_city_data = pd.read_csv(city_data[city])
    return raw_city_data

In [5]:
def parse_data(raw_city_data):
    '''Read CSV (comma-separated) file into DataFrame
    Args:
        (obj) raw_city_data from get_raw_city_data(city)
    Returns:
        (obj) parsed raw_city_data   
    '''
    #format column names
    raw_city_data.columns = [x.strip().replace(' ', '_') for x in raw_city_data.columns]
    # parse datetime 
    raw_city_data['Start_Time'] = pd.to_datetime(raw_city_data['Start_Time'])
    raw_city_data['End_Time'] = pd.to_datetime(raw_city_data['End_Time']) 
    # extract month and hour from the Start Time column to create month, hour columns
    raw_city_data['Month'] = raw_city_data['Start_Time'].dt.month
    raw_city_data['day_of_week'] = raw_city_data['Start_Time'].dt.weekday_name
    raw_city_data['Hour'] = raw_city_data['Start_Time'].dt.hour 
    # create 'journey' column that concatenates start_station, end_station 
    raw_city_data['Journey'] = raw_city_data['Start_Station'].str.cat(raw_city_data['End_Station'], sep=' to ')

    return raw_city_data

In [6]:
def filter_data(raw_city_data):
    '''Asks the user for a time period and filter the basic processed data according 
        to the specified filter and returns the filtered data and the filter name.
    Args:
        (obj) basic processed data
    Returns:
        (obj) filtered data
    '''

    # loop for handling invalid entries
    while True: 
        time_period = input('Would you like to filter the data by month, day, or not at all? Type "none" for no time filter.\n').lower()
        print('Great! Time period selected: %s' % time_period)
        if time_period in ('month', 'day', 'none'):
            break
        print('Enter a valid input provided in the options')

    if time_period =='day':
        while True:
            day_selection = input('Which day of the week? \n')
            if day_selection.lower() in days:
                print('Great! We\'ll use %s.' % day_selection)
                #return day_selection
                #filter the data accourding to the get_day
                filtered_city_data = raw_city_data[raw_city_data['Start_Time'].dt.dayofweek==day_selection]
                #filtered_city_data = city_data[city_data['month'] == month_selection]
                #time_period=days[day_selection]
                break

            print('That is not a valid answer. Please try again.')
                
    elif time_period =='month':
    #ask for the month of choice
        while True:
            month_selection = input('Select a month, January - June \n')
            if month_selection.lower() in months:
                print('Great! We\'ll use %s.' % month_selection)
                #return month_selection
                #filter the data accourding to the get_month
                #month_start_point = months.index(month_selection.lower()) + 1
                #filtered_city_data = raw_city_data[raw_city_data['Start_Time'].dt.month==month_start_point]
                filtered_city_data = raw_city_data[raw_city_data['Start_Time'].dt.month==month_selection]
                 #city_data = city_data[ city_data['day_of_week'] == day_selection.title()]               
                #time_period = month_selection.lower()

                break

            print('That is not a valid answer. Please try again.')
            

    else:
        filtered_city_data = raw_city_data # for none option

    return filtered_city_data



In [7]:
def display_statistics(filtered_city_data):
    '''Displays city data statistics on the specified filters
    Args:
        (ob) filtered_city_data
    Returns:
        statistics
    '''
    #Print heading that specifies selected city, filters
    print('\n')
    print('-------------------------------------')
    # display total number of trips for this city and filter
    #print('Total trips: ', (filtered_city_data['Start_Time'].count()))
    
    """Display statistics on the most frequent times of travel."""
    print('\nTrip Info:')
    # display the most common month
    popular_month = filtered_city_data['Month'].mode()[0]
    print(popular_month, 'is the month with the highest ridership')

    # display the most common day of week
    days_of_week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday',
                    'Saturday', 'Sunday']
    index = int(filtered_city_data['Start_Time'].dt.dayofweek.mode())
    popular_day = days_of_week[index]
    print(popular_day, 'is the day of the week with the highest ridership')

    # display the most common hour (from 0 to 23)
    popular_hour = filtered_city_data['Hour'].mode()[0]
    print(popular_hour, 'is the most common trip start hour')

    """Display statistics on the most popular stations and trip."""
    print('\nStation Info:')
    # display most commonly used start station & end station
    popular_start_station = filtered_city_data['Start_Station'].mode().to_string(index = False)
    popular_end_station = filtered_city_data['End_Station'].mode().to_string(index = False)
    print('Popular Start Station: ', popular_start_station)
    print('Popular End Station: ', popular_end_station)

    # display most frequent combination of start station and end station trip
    popular_journey = filtered_city_data['Journey'].mode().to_string(index = False)
    print('Popular Journey: ', popular_journey)

    """Displays statistics on the total and average trip duration."""
    print('\nOther Ridership Data:')
    # display total travel time
    total_travel_time = filtered_city_data['Trip_Duration'].sum()
    print('Total Time Travel:', total_travel_time)
    # display mean travel time
    mean_travel_time = filtered_city_data['Trip_Duration'].mean()
    print('Mean Time Travel:', mean_travel_time)

    """Displays statistics on bikeshare users."""
    print('\nUser Info:')
    #Display counts of user types
    user_types=filtered_city_data['User_Type'].value_counts()
    print(user_types)
    print('\n')
    
    if filtered_city_data == 'chicago' or filtered_city_data == 'new york': 
        user_statistics(filtered_city_data)

In [8]:
def user_statistics(filtered_city_data):
    '''Displays city data statistics on the specified filters, for specified cities
    Args:
        (obj) filtered_city_data
    Returns:
        user statistics for chicago and nyc data only 
    '''
    #Display counts of gender
    # Display earliest, most recent, and most common year of birth
    #gender_count = city_data.groupby('Gender')['Gender'].count()
    gender_count = filtered_city_data['Gender'].value_counts()
    print(gender_count)
    earliest = int(filtered_city_data['Birth_Year'].min())
    recent = int(filtered_city_data['Birth_Year'].max())
    mode = int(filtered_city_data['Birth_Year'].mode())
    print('The oldest birth year in the dataset is listed as {}.\nThe most recent birth year in the dataset is {}.'
          '\nThe most common birth year in the dataset is {}.'.format(earliest, recent, mode))
    print('\n')

In [9]:
def display_data(filtered_city_data, row):
    """
    Asks the user if they would you like to view individual trip data and loads the raw data 
    Args:
        (obj) filtered city_data
        ilocs
    Returns:
        data in detail
    """
    display = input('\nWould you like to view individual trip data?'
                    ' Type \'yes\' or \'no\'.\n').lower()
    if display == 'yes' or display == 'y':
        print(filtered_city_data.iloc[row:row+5])
        row += 5
        return display_data(filtered_city_data, row)
    if display == 'no' or display == 'n':
        return
    else:
        print('That is not a valid answer. Please try again.')
        return display_data(filtered_city_data, row)
#https://stackoverflow.com/questions/43772362/how-to-print-a-specific-row-of-a-pandas-dataframe
#https://pandas.pydata.org/pandas-docs/stable/indexing.html


In [10]:
def main():
    """
    Loads analysis and data for the specified city and filters.
    """
    # 1) pick a city   
    city = get_city()
    print('Great! We\'ll use %s.' % city)
    
    # 3) load data
    raw_city_data = get_raw_city_data(city)
    
    #4) parse data
    parse_data(raw_city_data)
    
    #5) filter data
    filtered_city_data = filter_data(raw_city_data)
    
    #6) display statistics
    display_statistics(filtered_city_data)
    
    #7) see data details
    see_data = display_data(filtered_city_data, row=76)

    #8) restart if you wish
    restart = input('\nWould you like to restart? Enter yes or no.\n')
    if restart.lower() == 'yes' or restart.lower() == 'y':
        main()
    elif restart.lower() == 'no' or restart.lower() == 'n':
        return
    else:
        print("\nThat is not a valid answer. Please try again.")
        return restart()

In [11]:
if __name__ == "__main__":
	main()

Hello! Let's explore some US bikeshare data!
Would you like to see data for Chicago, New York, or Washington?
chicago
Great! We'll use chicago.
Would you like to filter the data by month, day, or not at all? Type "none" for no time filter.
day
Great! Time period selected: day
Which day of the week? 
tuesday
Great! We'll use tuesday.


  result = method(y)


TypeError: invalid type comparison