# Overview
This project is part of the Data Analyst Nanodegree program of Udacity.
Through this project i will solicit user input to guide the analysis of bikeshare datasets in three different American cities; Washington, Chicago and NYC.  

In [1]:
# Importing required packages
import time
import pandas as pd
import numpy as np

In [2]:
# mapping data files to corresponding city names via a dictionary
CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }


In [7]:
# Getting the user input for the selected city
def get_city_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    city_selection = input('To view the available bikeshare data, type:\n (a) for Chicago\n (b) for New York City\n (c) for Washington:\n  ').lower()
    
    # If the answer is invalid, repeat the question.
    while city_selection not in {'a','b','c'}:
        print('That\'s invalid input.')
        city_selection = input('To view the available bikeshare data, type:\n (a) for Chicago\n (b) for New York City\n (c) for Washington:\n  ').lower()

    if city_selection == "a":
        city = 'chicago'
    elif city_selection == "b":
        city = 'new york city'
    elif city_selection == "c":
        city = "washington"
    
    print('-'*40)
    print (city)
    return city

In [4]:
get_city_filters()

Hello! Let's explore some US bikeshare data!
To view the available bikeshare data, type:
 (a) for Chicago
 (b) for New York City
 (c) for Washington:
  a
----------------------------------------


'chicago'

In [8]:
# Getting user input for time frame
def get_time_filters():
    print('What\'s the time window you want view the analysis for?')
    time_frame = input('\n\nWould you like to filter {}\'s data by month, day, both, or neither of them?\nKindly type "month" or "day" or "both" or "none": \n'.format(city.title())).lower()
    
    # Repeat the question in case of invalid input
    while time_frame not in {'none', 'both','month','day'}:
        print('That\'s not a valid choice')
        time_frame = input('\n\nWould you like to filter {}\'s data by month, day, both, or not at all? type month or day or both or none: \n\n'.format(city.title())).lower()
    
    # If no time frame selected analyze the whole period of the chosen city dataset.  
    if time_frame == 'none':
        print('\nFiltering for {} for the 6 months period\n\n'.format(city.title()))
        month = 'all'
        day = 'all'
    
    # If filtering by both month and day is selected, get the user input for the month and day
    elif time_frame == 'both':
        
        # Get month selection
        month = input('which month? Please type out (January / February / March / April / May / June)\n').lower()
        while month not in ['january', 'february', 'march', 'april', 'may', 'june']:
            print('Invalid month choice!!')
            month = input('which month? Please type out (January / February / March / April / May / June)\n').lower()
        
        # Get day Selection
        day = input('Which day? Please type a day: Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday.\n').lower()
        while day not in ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']:
            print('Invalid day choice!!')
            day = input('Which day? Please type a day: Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday.\n').lower()
            
    # If filtering by month only
    elif time_frame == 'month':
        # Get month selection
        month = input('which month? Please type out (January / February / March / April / May / June)\n').lower()
        while month not in ['january', 'february', 'march', 'april', 'may', 'june']:
            print('Invalid month choice!!')
            month = input('which month? Please type out (January / February / March / April / May / June)\n').lower()
        
        # As there's no day filter, set the day to "all"    
        day = 'all'
    
    # If Filtering by day only
    elif time_frame == 'day':
        day = input('Which day? Please type a day: Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday.\n').lower()
        while day not in ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']:
            print('Invalid day choice!!')
            day = input('Which day? Please type a day: Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday.\n').lower()
        
        # Set month to all
        month = 'all'
        
    print('-'*40)
    print("selected month: {}, selected day: {}".format(month, day))
    return (month, day)

In [9]:
city = get_city_filters()
month, day = get_time_filters()

Hello! Let's explore some US bikeshare data!
To view the available bikeshare data, type:
 (a) for Chicago
 (b) for New York City
 (c) for Washington:
  c
----------------------------------------
washington
What's the time window you want view the analysis for?


Would you like to filter Washington's data by month, day, both, or neither of them?
Kindly type "month" or "day" or "both" or "none": 
both
which month? Please type out (January / February / March / April / May / June)
january
Which day? Please type a day: Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday.
tuesday
----------------------------------------
selected month: january, selected day: tuesday


In [11]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    # load data file into a dataframe
    df = pd.read_csv(CITY_DATA[city])

    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # extract month and day of week from Start Time to create new columns
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.weekday_name
    # filter by month if applicable
    if month != 'all':
        # use the index of the months list to get the corresponding int
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1

        # filter by month to create the new dataframe
        df = df[df['month']==month]

    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        df = df[df['day_of_week']==day.title()]

    return df

load_data(city, month, day)
df = load_data(city, month, day)

In [13]:
# Gleaning some time statistics
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""
    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()
    # display the most common month
    if month == 'all':
        # find the most popular month
        popular_month = df['month'].mode()[0]
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        popular_month = months[popular_month - 1]
        print('Most Popular Start Month:', popular_month)

    # display the most common day of week
    if day == 'all':
        popular_day = df['day_of_week'].mode()[0]
        print('Most Popular Start day:', popular_day)

    # display the most common start hour
        # extract hour from the Start Time column to create an hour column
    df['hour'] = df['Start Time'].dt.hour
        # find the most popular hour
    popular_hour = df['hour'].mode()[0]
    print('Most Popular Start Hour:', popular_hour)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)
    
time_stats(df)


Calculating The Most Frequent Times of Travel...

Most Popular Start Hour: 17

This took 0.13762998580932617 seconds.
----------------------------------------


In [47]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    popular_start_station = df['Start Station'].mode()[0]
    print('Most commonly used start station: ', popular_start_station)

    # display most commonly used end station
    popular_end_station = df['End Station'].value_counts().index.tolist()[0]
    print('\nMost commonly used end station: ', popular_end_station)
    
    # display most frequent combination of start station and end station trip
    most_frequent_route = df.groupby(['Start Station', 'End Station']).size().reset_index(name='count').sort_values('count',ascending=False).head(1)
    print('\nMost frequent combination of start station and end station trip:\n ',most_frequent_route)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

station_stats(df)



Calculating The Most Popular Stations and Trip...

Most commonly used start station:  Columbus Circle / Union Station

Most commonly used end station:  Columbus Circle / Union Station

Most frequent combination of start station and end station trip:
                          Start Station    End Station  count
2657  Columbus Circle / Union Station  8th & F St NE     12

This took 0.015958309173583984 seconds.
----------------------------------------


#### Comment:
>Great! Another way is to merge the starting and ending columns into one column, ie a string representing the route, and then use the .value_counts() method to find the most common routes.