In [None]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    while True:
        city = input('Would you like to see data for Chicago, New York city, Washington? \n')
        print("You selected:",city )
        if city.lower() not in ('chicago', 'new york city', 'washington'):
            print("Not an appropriate choice, please enter valid name from above cities\n")
        else:
            break
   
    # get user input for month (all, january, february, ... , june)
    while True:
        month = input('Would you like to filter the data by month? select choice from here: January, February, March, April, May, June or All?\n')
        print("You selected:",month)
        if month.lower() not in ('january', 'february', 'march', 'april', 'may', 'june', 'all'):
            print("Not an appropriate choice, please enter a valid month\n")
        else:
            break
            
    # get user input for day of week (all, monday, tuesday, ... sunday)
    while True:
        day = input('Would you like to filter the data by day? select day: Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday or All\n')
        print("You selected:",day)
        if day.lower() not in ('monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday','sunday', 'all'):
            print("Not an appropriate choice, please enter a valid day\n")
        else:
            break


    print('-'*40)
    return city, month, day


def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    #df = pd.read_csv(CITY_DATA[city])
    # convert the Start Time column to datetime
    df = pd.read_csv(CITY_DATA[city.lower()])

    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # extract month and day of week from Start Time to create new columns
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()
    


    # filter by month if applicable
    if month.lower() != 'all':
        # use the index of the months list to get the corresponding int
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1
    
        # filter by month to create the new dataframe
        df =  df[df['month'] == month]

    # filter by day of week if applicable
    if day.lower() != 'all':
        # filter by day of week to create the new dataframe
        df =  df[df['day_of_week'] == day.title()]

    return df


def time_stats(df, month, day):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # display the most common month
    if month.lower() == 'all' :
        popular_month = df['month'].mode()[0]
        months = ['January', 'February', 'March', 'April', 'May', 'June']
        popular_month = months[popular_month - 1]
        print('Most Popular Month:', popular_month)


    # display the most common day of week
    if day.lower() == 'all' :
        popular_day = df['day_of_week'].mode()[0]
        print('Most Popular Day of Week:', popular_day)


    # display the most common start hour
    df['hour'] = df['Start Time'].dt.hour
    popular_start_hour = df['hour'].mode()[0]
    print('Most Popular Start hour:', popular_start_hour)


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station
    popular_start_station = df['Start Station'].mode()[0]
    print('Most Commonly Used Start Station :', popular_start_station)


    # TO DO: display most commonly used end station
    popular_end_station = df['End Station'].mode()[0]
    print('Most Commonly Used End Station :', popular_end_station)

    # TO DO: display most frequent combination of start station and end station trip
    df["trip"] = df["Start Station"] + "--" + df["End Station"]
    popular_trip = df['trip'].mode()[0]
    print('Most Frequent Trip :', popular_trip)
    


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time
    total_travel_time = df['Trip Duration'].sum()
    print('Total Travel Time :', total_travel_time)


    # TO DO: display mean travel time
    mean_travel_time = df['Trip Duration'].mean()
    print('Average Travel Time :', mean_travel_time)


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def user_stats(df, city):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types
    print(df['User Type'].value_counts())


    if city.lower() in ('chicago', 'new york city'):
        # TO DO: Display counts of gender
        print(df['Gender'].value_counts())
        # TO DO: Display earliest, most recent, and most common year of birth
        print("Earliest Year:", df['Birth Year'].min())
        print("Most Recent Year:", df['Birth Year'].max())
        print("Most Common Year:", df['Birth Year'].mode()[0])
    else:
        print("No Data Here About Gender or Birth Year")


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)
    
    
def display_raw_data(city):
    pd.set_option('display.max_columns',200)
    display_raw = input("Do you want to display raw of data?\n")
    if display_raw.lower() =='yes':
        for chunk in pd.read_csv(CITY_DATA[city.lower()], chunksize=5 ):
            while display_raw.lower() not in ('yes', 'no'):
                display_raw = input('Invalid input, Please enter: yes or no\n')
            if display_raw.lower()=='no':
                print('Thanks... End of Program')
                break 
            print(chunk)
            display_raw = input("Do you want to display another raw of data?\n")
            
    elif display_raw.lower()=='no':
        print('Thanks... End of Program')
        
    else:
        print('Invalid input, Please enter: yes or no')
        display_raw_data(city)

def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)
        time_stats(df, month, day)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df,city)
        display_raw_data(city)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break
if __name__ == '__main__':
    main()