In [None]:
import time
import math
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': pd.read_csv('chicago.csv'),
              'new york city': pd.read_csv('new_york_city.csv'),
              'washington': pd.read_csv('washington.csv' )}
MONTH_DATA = { 'january': 1,
                'february': 2,
                'march': 3,
                'april': 4,
                'may': 5,
                'june': 6,
                'jan': 1,
                'feb': 2,
                'mar': 3,
                'apr': 4,
                'may': 5,
                'jun': 6}

WEEK_DATA = { 'monday': 0,
                'tuesday': 1,
                'wednesday': 2,
                'thursday': 3,
                'friday': 4,
                'saturday': 5,
                'sunday': 6,
                'mon': 0,
                'tues': 1,
                'wed': 2,
                'thur': 3,
                'fri': 4,
                'sat': 5,
                'sun': 6}

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.
    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    print()
    # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    while 1:
        print('Which country\'s should we look for?')
        cty = input('Chicago/CHI, New York City/NYC, or Washington/DC? ').lower()
        print()
        if cty=='chi':
            cty='chicago'
        if cty=='ny' or cty=='nyc':
            cty='new york city'
        if cty=='dc' or cty=='washington dc':
            cty='washington'
        if cty not in CITY_DATA:
            print('Please enter a valid city')
            continue
        cityName = cty
        cty = CITY_DATA[cty]
        break
    # TO DO: get user input for month (all, january, february, ... , june)
    # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
    while 1:
        choice = input('Do you want to filter the data by month and/or week? Yes/No ').lower()
        print()
        if choice=='yes' or choice=='y' or choice=='ya':
            choice=True
        elif choice=='no' or choice=='n' or choice=='nope':
            choice=False
        else:
            print('Oops Invalid choice. Try again! ')
            continue
        break

    while 1:
        if choice:
            filter=input('You can filter by month / day / both ').lower()
            print()
            if filter=='month':
                print('Which month\'s data to look at?')
                month = input('January/jan, February/feb, March/mar, April/apr, May, June/jun- ').lower()
                print()
                if month not in MONTH_DATA:
                    print('Oops Invalid input. Try again!')
                    continue
                month = MONTH_DATA[month]
                day='all'
            elif filter=='day':
                print('Which day\'s data to look at? ')
                day = input('Monday/mon, Tuesday/tues, Wednesday/wed, Thursday/thur, Friday/fri, Saturday/sat, Sunday/sun- ').lower()
                print()
                if day not in WEEK_DATA:
                    print('Oops Invalid input. Try again!')
                    continue
                day = WEEK_DATA[day]
                month='all'
            elif filter=='both':
                print('Which month\'s data to look at?')
                month = input('January/jan, February/feb, March/mar, April/apr, May, June/jun- ').lower()
                print()
                if month not in MONTH_DATA:
                    print('Oops Invalid input. Try again!')
                    continue
                month = MONTH_DATA[month]
                print('And day of the week?')
                day = input('Monday/mon, Tuesday/tues, Wednesday/wed, Thursday/thur, Friday/fri, Saturday/sat, Sunday/sun- ').lower()
                print()
                if day not in WEEK_DATA:
                    print('Oops Invalid input. Try again!')
                    continue
                day = WEEK_DATA[day]
            else:
                print('Oops Invalid input. Try again!')
                continue
            break
        else:
            day='all'
            month='all'
            break

    print('-'*40)
    return cty, cityName, month, day


def load_data(cty, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.
    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        DataFrame - Pandas DataFrame containing city data filtered by month and day
    """
    DataFrame = pd.read_csv(cty)
    DataFrame['day_of_week'] = pd.to_datetime(DataFrame['Start Time']).dt.dayofweek
    DataFrame['month'] = pd.to_datetime(DataFrame['Start Time']).dt.month
    if day != 'all':
        DataFrame = DataFrame[DataFrame['day_of_week'] == day]
    if month != 'all':
        DataFrame = DataFrame[DataFrame['month'] == month]
    DataFrame.drop('day_of_week',axis=1,inplace=True)
    DataFrame.drop('month',axis=1,inplace=True)
    return DataFrame


def time_stats(DataFrame):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    StartTime = time.time()

    DataFrame['day_of_week'] = pd.to_datetime(DataFrame['Start Time']).dt.dayofweek
    DataFrame['month'] = pd.to_datetime(DataFrame['Start Time']).dt.month
    #temporary_DataFrame = pd.read_csv(cty)
    # TO DO: display the most common month
    most_frequent_month = DataFrame['month'].mode()[0]
    for num in MONTH_DATA:
        if MONTH_DATA[num]==most_frequent_month:
            most_frequent_month = num.title()
    print('The most common month for travel is {}'.format(most_frequent_month))

    # TO DO: display the most common day of week
    most_frequent_time = DataFrame['day_of_week'].mode()[0]
    for num in WEEK_DATA:
        if WEEK_DATA[num]==most_frequent_time:
            most_frequent_time = num.title()
    print('The most common day of week for travel is {}'.format(most_frequent_time))

    # TO DO: display the most common start hour
    DataFrame['hour']=pd.to_datetime(DataFrame['Start Time']).dt.hour
    most_frequent_hour = DataFrame['hour'].mode()[0]
    print('The most common hour for travel is {}'.format(most_frequent_hour))
    DataFrame.drop('hour',axis=1,inplace=True)
    DataFrame.drop('day_of_week',axis=1,inplace=True)
    DataFrame.drop('month',axis=1,inplace=True)
    print("\nThis took %s seconds." % (time.time() - StartTime))
    print('-'*40)


def station_stats(DataFrame):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    StartTime = time.time()

    # TO DO: display most commonly used start station
    print()
    print('Most commonly used start station as per our data was {}'.format(DataFrame['Start Station'].mode()[0]))

    # TO DO: display most commonly used end station
    print()
    print('Most commonly used end station as per our data was {}'.format(DataFrame['End Station'].mode()[0]))

    # TO DO: display most frequent combination of start station and end station trip
    print()
    most_frequent_station_comb = DataFrame['Start Station'] + ' to ' + DataFrame['End Station']
    print('The most frequnt combination of start station and end station trip was {}'.format(most_frequent_station_comb.mode()[0]))

    print("\nThis took %s seconds." % (time.time() - StartTime))
    print('-'*40)


def trip_duration_stats(DataFrame):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    StartTime = time.time()
    travel_durations = pd.to_datetime(DataFrame['End Time']) - pd.to_datetime(DataFrame['Start Time'])

    # TO DO: display total travel time
    print()
    td_sum = DataFrame['Trip Duration'].sum()
    sum_seconds = td_sum%60
    sum_minutes = td_sum//60%60
    sum_hours = td_sum//3600%60
    sum_days = td_sum//24//3600
    print('Passengers travelled a total of {} days, {} hours, {} minutes and {} seconds'.format(sum_days, sum_hours, sum_minutes, sum_seconds))

    # TO DO: display mean travel time
    print()
    td_mean = math.ceil(DataFrame['Trip Duration'].mean())
    mean_seconds = td_mean%60
    mean_minutes = td_mean//60%60
    mean_hours = td_mean//3600%60
    mean_days = td_mean//24//3600
    print('Passengers travelled an average of {} hours, {} minutes and {} seconds'.format(mean_hours, mean_minutes, mean_seconds))

    print("\nThis took %s seconds." % (time.time() - StartTime))
    print('-'*40)


def user_stats(DataFrame):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    StartTime = time.time()

    # TO DO: Display counts of user types
    print()
    types_of_users = DataFrame.groupby('User Type',as_index=False).count()
    print('Number of types of users are {}'.format(len(types_of_users)))
    for i in range(len(types_of_users)):
        print('{}s - {}'.format(types_of_users['User Type'][i], types_of_users['Start Time'][i]))

    # TO DO: Display counts of gender
    print()
    if 'Gender' not in DataFrame:
        print('Oops, no data found :(')
    else:
        gender_of_users = DataFrame.groupby('Gender',as_index=False).count()
        print('Number of genders of users mentioned in the data are {}'.format(len(gender_of_users)))
        for i in range(len(gender_of_users)):
            print('{}s - {}'.format(gender_of_users['Gender'][i], gender_of_users['Start Time'][i]))
        print('Gender data for {} users is not available.'.format(len(DataFrame)-gender_of_users['Start Time'][0]-gender_of_users['Start Time'][1]))

    # TO DO: Display earliest, most recent, and most common year of birth
    print()
    if 'Birth Year' not in DataFrame:
        print('Data related to birth year of users is not available for this city.')
    else:
        birth = DataFrame.groupby('Birth Year', as_index=False).count()
        print('Earliest year of birth: {}.'.format(int(birth['Birth Year'].min())))
        print('Most recent year of birth: {}.'.format(int(birth['Birth Year'].max())))
        print('Most common year of birth year: {}.'.format(int(birth.iloc[birth['Start Time'].idxmax()]['Birth Year'])))

    print("\nThis took %s seconds." % (time.time() - StartTime))
    print('-'*40)

def display_data(DataFrame):
    choice = input('Would you like to read some of the raw data? Yes/No ').lower()
    print()
    if choice=='yes' or choice=='y' or choice=='ya':
        choice=True
    elif choice=='no' or choice=='n' or choice=='nope':
        choice=False
    else:
        print('Not a valid choice. Try again! ')
        display_data(DataFrame)
        return

    if choice:
        while 1:
            for i in range(5):
                print(DataFrame.iloc[i])
                print()
            choice = input('Another five? Yes/No ').lower()
            if choice=='yes' or choice=='y' or choice=='ya':
                continue
            elif choice=='no' or choice=='n' or choice=='nope':
                break
            else:
                print('You did not enter a valid choice.')
                return

def main():
    while True:
        cty,cityName, month, day = get_filters()
        if cityName == "new york city":
            cityName = "new_york_city"
            
        DataFrame = load_data(cityName+'.csv', month, day)

        time_stats(DataFrame)
        station_stats(DataFrame)
        trip_duration_stats(DataFrame)
        user_stats(DataFrame)
        display_data(DataFrame)

        restart = input('\nWould you like to restart? Enter yes or no.\n').lower()
        print()
        if restart != 'yes' and restart != 'y' and restart != 'ya':
            break

if __name__ == "__main__":
	main()

Hello! Let's explore some US bikeshare data!

Which country's should we look for?
Chicago/CHI, New York City/NYC, or Washington/DC? DC

Do you want to filter the data by month and/or week? Yes/No Yes

You can filter by month / day / both both

Which month's data to look at?
January/jan, February/feb, March/mar, April/apr, May, June/jun- jun

And day of the week?
Monday/mon, Tuesday/tues, Wednesday/wed, Thursday/thur, Friday/fri, Saturday/sat, Sunday/sun- fri

----------------------------------------

Calculating The Most Frequent Times of Travel...

The most common month for travel is June
The most common day of week for travel is Friday
The most common hour for travel is 8

This took 0.03747296333312988 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...


Most commonly used start station as per our data was Jefferson Dr & 14th St SW

Most commonly used end station as per our data was Columbus Circle / Union Station

The most frequnt co

In [None]:
here are four questions that will change the answers:

Would you like to see data for Chicago, New York, or Washington?
Would you like to filter the data by month, day, or not at all?
(If they chose month) Which month - January, February, March, April, May, or June?
(If they chose day) Which day - Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, or Sunday?

https://stackoverflow.com/questions/21323692/convert-seconds-to-weeks-days-hours-minutes-seconds-in-python
    https://pandas.pydata.org/pandas-docs/version/0.17.0/
        https://jakevdp.github.io/PythonDataScienceHandbook/03.08-aggregation-and-grouping.html
            https://pythonprogramming.net/introduction-intermediate-python-tutorial/
                https://pandas.pydata.org/pandas-docs/stable/reference/index.html
                    https://docs.python.org/3/reference/expressions.html?highlight=boolean
                        https://pandas.pydata.org/pandas-docs/version/0.17.0/
                            