#  Explore US Bikeshare Data

##  Statistics Computed

#1 Popular times of travel (i.e., occurs most often in the start time)

    1.most common month
    2.most common day of week
    3.most common hour of day

#2 Popular stations and trip

    1.most common start station
    2.most common end station
    3.most common trip from start to end (i.e., most frequent combination of start station and end station)

#3 Trip duration

    1.total travel time
    2.average travel time

#4 User info

    1.counts of each user type
    2.counts of each gender (only available for NYC and Chicago)
    3.earliest, most recent, most common year of birth (only available for NYC and Chicago)

In [1]:
# -*- coding: utf-8 -*-
"""
Created on Sun Jun 10 00:55:55 2018

@author: Titan
"""

import pandas as pd
import time


CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }
    
city_list = CITY_DATA.keys()
month_list = ["January","February","March","April","May","June","July","August","September","October","November","December"]    
day_list = ["Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"]
choice_list = ["Y", "y", "Yes", "YES", "yes", "N", "n", "NO", "no", "No"]
choice = ["Y", "y", "Yes", "YES", "yes"]


def get_filters():
    
    print('\n'*3)
    print('*'*65)
    print('Hello! Let\'s explore some US bikeshare data!')
    print('*'*65)
    print('\n'*2)
    
    # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    city_input = str(input("\nDefault city is chicago. Would you like to filter the data based on any other City? \n"))
    while city_input not in choice_list:
        city_input = str(input("\nDefault city is chicago. Would you like to filter the data based on any other City? \n"))
        
    if city_input in choice:
        city = input("\nPlease Enter City name you would like to filter!\n")
        while city not in city_list:
            print("\nWe regreat!!! Data related to your fevaurite city not available at present!!!")           
            print("\nYou can choose any city from the below list:")
            for city_name in city_list:
                print(city_name)
                                
            city = input("\nPlease Enter City name you would like to filter!\n")
    else:
        city = 'chicago'
        
    # TO DO: get user input for month (all, january, february, ... , june)
    month_input = input("\nWould you like to filter the data based on any specific month? \n")
    while month_input not in choice_list:
        month_input = input("\nWould you like to filter the data based on any specific month? \n")
        
    if month_input in choice:
        month = input("\nPlease Enter month information (like- January,February,March) you would like to filter!\n ")
        while month not in month_list:
            print("\Incorrect!!! Please enter correct month!\n")
            print("\nYou can choose from {}".format(month_list))
            month = input("\nPlease Enter month information (like- January,February,March) you would like to filter!\n ")
    else:
        month = "All"
    
    day_input = input("\nWould you like to filter the data based on any specific day? \n")
    while day_input not in choice_list:
        day_input = input("\nWould you like to filter the data based on any specific day? \n")
        
    if day_input in choice:
        day = input("\nPlease Enter day information (like- Monday,Tuesday,Wednesday) you would like to filter!\n")
        while day not in day_list:
            print("\Incorrect!!! Please enter correct day!\n")
            print("You can choose from {}".format(day_list))
            day = input("\nPlease Enter day information (like- Monday,Tuesday,Wednesday) you would like to filter!\n")
    else:
        day = "All"

    print('\n'*3)
    print('-'*65)
    print("\nYou have selected city- {}, month- {}, and day- {}".format(city, month, day))
    print('-'*65)
    return city, month, day



def load_data(CITY_DATA, city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    

    print('\n'*2)
    print("\nLet us load the data to calculate.... ")
    print('*'*65)
    print('\n'*2)
       

    df = pd.read_csv(CITY_DATA[city])

    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # extract month and day of week from Start Time to create new columns
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.weekday_name
    
    
    # filter by month if applicable
    if month != 'All':
        # use the index of the months list to get the corresponding int
        #months = ['january', 'february', 'march', 'april', 'may', 'june']
        month_list = ["January","February","March","April","May","June","July","August","September","October","November","December"]    
        months = month_list
        month = month_list.index(month) + 1
        
        if month in df['month'].unique():
            # filter by month to create the new dataframe
            df = df[df['month'] == month]
        else:
            print("\nCalculating result for all months as data related to this month not available...")
    
    # filter by day of week if applicable
    if day != 'All':
        # filter by day of week to create the new dataframe
        df = df[df['day_of_week'] == day.title()]
   
    return df   


def time_stats(df, month, day, month_list, day_list):
    """Displays statistics on the most frequent times of travel."""
    
    print('\n'*3)
    print('\nCalculating The Most Frequent Times of Travel...\n')
    print('*'*65)
    print('\n')
    
    start_time = time.time()

    # TO DO: display the most common month
    popular_month = df['month'].value_counts().idxmax()
    
    
    # TO DO: display the most common day of week
    popular_day = df['day_of_week'].value_counts().idxmax()
        

    # TO DO: display the most common start hour
    # extract hour from the Start Time column to create an hour column
    df['hour'] = df['Start Time'].dt.hour

    # find the most popular hour
    popular_hour = df['hour'].value_counts().idxmax()

    print("\nMost common month is: {}".format(month_list[popular_month]))
    print("\nMost common day of week is: {}".format(popular_day))
    print("\nMost common start hour is: {}".format(popular_hour))

    print('\n')
    print('-'*65)    
    print("\nThis took %s seconds to calculate." % (round(time.time() - start_time, 2)))
    print('-'*65)
    
    
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""
    
    print('\n'*3)
    print('\nCalculating The Most Popular Stations and Trip...\n')
    print('*'*65)
    print('\n')

    
    start_time = time.time()

    # TO DO: display most commonly used start station
    popular_Start_Station = df['Start Station'].value_counts().idxmax()

    # TO DO: display most commonly used end station
    popular_End_Station = df['End Station'].value_counts().idxmax()

    # TO DO: display most frequent combination of start station and end station trip
    df['start_End_Station'] = df['Start Station'] +", "+ df['End Station']
    popular_start_End_Station = df['start_End_Station'].value_counts().idxmax()
    
    print("\nMost commonly used start station is: %s" %(popular_Start_Station))
    print("\nMost commonly used end station is: %s" %(popular_End_Station))
    print("\nMost frequent combination of start station and end station trip are: %s" %(popular_start_End_Station))
     
    print('\n')
    print('-'*65)    
    print("\nThis took %s seconds to calculate." % (round(time.time() - start_time, 2)))
    print('-'*65)
    
    
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\n'*3)
    print('\nCalculating Trip Duration...\n')
    print('*'*65)
    print('\n')
    
    start_time = time.time()

    # TO DO: display total travel time
    total_travel_time = df['Trip Duration'].sum()

    # TO DO: display mean travel time
    mean_travel_time = df['Trip Duration'].mean()
    
    print("\nTotal travel time is: %s" %(total_travel_time))
    print("\nMean travel time is: %s" %(mean_travel_time))

    print('\n')
    print('-'*65)    
    print("\nThis took %s seconds to calculate." % (round(time.time() - start_time, 2)))
    print('-'*65)
    

def user_stats(df, city):
    """Displays statistics on bikeshare users."""
    
    print('\n'*3)
    print('\nCalculating User Stats...\n')
    print('*'*65)
    print('\n')
    
    start_time = time.time()

    # TO DO: Display counts of user types
    user_types = df['User Type'].value_counts()

    # TO DO: Display counts of gender
    if city == "washington":
        gender_types = "Gender Data not available for this city!!!"        
    else:
        gender_types = df['Gender'].value_counts()
        
    print("\nCounts of user types:\n")
    print(user_types)
    print()

    print("\nCounts of gender: \n")
    print(gender_types)
    print()
    
    # TO DO: Display earliest, most recent, and most common year of birth
    if city != "washington":
        earliest = df['Birth Year'].min()
        most_recent = df['Birth Year'].max()
        most_common = df['Birth Year'].value_counts().idxmax()
        
        print("\nEarliest year of birth is: %s" % (int(earliest)))
        print("\nMost recent year of birth is: %s" % (int(most_recent)))
        print("\nMost common year of birth is: %s" % (int(most_common)))
    else:
        print("\nYear of birth information not available for this city!!!")
    
    print('\n')
    print('-'*65)    
    print("\nThis took %s seconds to calculate." % (round(time.time() - start_time, 2)))
    print('-'*65)
    

def main():
    while True:
        city, month, day = get_filters()
        
        df = load_data(CITY_DATA, city, month, day)

        time_stats(df, month, day, month_list, day_list)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df, city)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() in choice:
            break
        
           
if __name__ == "__main__":    
    main()
        
        





*****************************************************************
Hello! Let's explore some US bikeshare data!
*****************************************************************




Default city is chicago. Would you like to filter the data based on any other City? 
n

Would you like to filter the data based on any specific month? 
n

Would you like to filter the data based on any specific day? 
n




-----------------------------------------------------------------

You have selected city- chicago, month- All, and day- All
-----------------------------------------------------------------




Let us load the data to calculate.... 
*****************************************************************








Calculating The Most Frequent Times of Travel...

*****************************************************************



Most common month is: July

Most common day of week is: Tuesday

Most common start hour is: 17


-----------------------------------------------------------------

T