In [None]:
# This script was developed as project submission for Programming for Data Science Udacity Training

import time
import pandas as pd
import numpy as np


CITY_DATA = { 'chicago': 'chicago.csv',
              'new york': 'new_york_city.csv',
              'washington': 'washington.csv' }
Months = ['january', 'february', 'march', 'april', 'may', 'june', 'all']
Cities = ['chicago', 'new york', 'washington']
Days = ['sunday', 'monday', 'tuesday', 'wednesday', \
        'thursday', 'friday', 'saturday', 'all' ]

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')

    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    while True:
       city = input('Which city do you want to learn about Chicago, New York or Washington? \n> ').lower()
       if city in Cities:
           break

    # get user input for month (all, january, february, ... , june)
    while True:
       month = input('Please provide a month name or type  \'all\' to apply no month filter. \n(e.g. all, january, february, march, april, may, june) \n> ')
       if month in Months:
           break
    # get user input for day of week (all, monday, tuesday, ... sunday)
    while True:
       day = input('Please type one of the week day you want to analyze?'\
                   ' You can type \'all\' to apply no day filter. \n(e.g. all, monday, sunday) \n> ')
       if day in Days:
            break

    print('='*40)
    return city, month, day


def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """

    # import data file to a dataframe
    df = pd.read_csv(CITY_DATA[city])

    # convert the Start Time to datetime datatype
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # extract month and day of week and hour from Start Time to create new columns month, day_of_week, hour
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.weekday_name
    df['hour'] = df['Start Time'].dt.hour

    # filter by month if applicable
    if month != 'all':
        month =  Months.index(month) + 1
        df = df[ df['month'] == month ]

    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        df = df[ df['day_of_week'] == day.title()]
    return df


def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # TO DO: display the most common month
    most_popular_month = df['month'].value_counts().idxmax()
    print("The most popular month is :", most_popular_month)

    # TO DO: display the most common day of week
    most_popular_day_of_week = df['day_of_week'].value_counts().idxmax()
    print("The most popular day of week is :", most_popular_day_of_week)

    # TO DO: display the most common start hour
    
    most_popular_start_hour = df['hour'].value_counts().idxmax()
    print("The most popular start hour is :", most_popular_start_hour)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('='*40)


def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station
    
    most_popular_start_station = df['Start Station'].value_counts().idxmax()
    print("The most popular start station is :", most_popular_start_station)

    # TO DO: display most commonly used end station
    
    most_popular_end_station = df['End Station'].value_counts().idxmax()
    print("The most popular end station is :", most_popular_end_station)

    # TO DO: display most frequent combination of start station and end station trip
    
    most_popular_start_end_station = df[['Start Station', 'End Station']].mode().loc[0]
    print("The most commonly used start station and end station are : {}, {}"\
            .format(most_popular_start_end_station[0], most_popular_start_end_station[1]))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('='*40)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time

    total_travel = df['Trip Duration'].sum()
    print("Total travel time :", total_travel)
    
    # TO DO: display mean travel time

    mean_travel = df['Trip Duration'].mean()
    print("Mean travel time :", mean_travel)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('='*40)


def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types
    
    print("Counts of user types:\n")
    user_counts = df['User Type'].value_counts()
   
    for index, user_count in enumerate(user_counts):
        print("  {}: {}".format(user_counts.index[index], user_count))
        print()
    
    
    if 'Gender' in df.columns:
        user_stats_gender(df)
    else:
        print('Gender information not available for Washington city ')
    if 'Birth Year' in df.columns:
        user_stats_birth(df)
    else:
        print('Birth year information not available for Washington city')

       
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('='*40)

    # TO DO: Display counts of gender
    
def user_stats_gender(df):
    
        # Display counts of gender
    print("Counts of gender:\n")
    gender_counts = df['Gender'].value_counts()
    # iteratively print out the total numbers of genders 
    for index,gender_count   in enumerate(gender_counts):
        print("  {}: {}".format(gender_counts.index[index], gender_count))
    
    print()

    # TO DO: Display earliest, most recent, and most common year of birth

def user_stats_birth(df):    
    birth_year = df['Birth Year']
    # the most common birth year
    most_common_year = birth_year.value_counts().idxmax()
    print("The most common birth year is :", most_common_year)
    # the most recent birth year
    most_recent = birth_year.max()
    print("The most recent birth year is :", most_recent)
    # the most earliest birth year
    earliest_year = birth_year.min()
    print("The most earliest birth year was :", earliest_year)

    
    
def display_data(df):
    """Displays raw bikeshare data."""
          
    yes = input('\nWould you like to see sample of raw data? Type \'yes\' or \'no\'\n> ')
    if yes.lower() == 'yes':
     print(df.head())

def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
 

        display_data(df)
    
        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()
    