In [2]:
import pandas as pd
import numpy as np
import time
import datetime

chicago_df = pd.read_csv('chicago.csv')
new_york_df = pd.read_csv('new_york_city.csv')
washington_df = pd.read_csv('washington.csv')
City_Data = {'chicago' : chicago_df , 'new york': new_york_df , 'washington' : washington_df}

chicago_df['Start Time'] =pd.to_datetime(chicago_df['Start Time'])
new_york_df['Start Time'] =pd.to_datetime(new_york_df['Start Time'])
washington_df['Start Time'] =pd.to_datetime(washington_df['Start Time'])

chicago_df['Month']=chicago_df['Start Time'].dt.month_name()
new_york_df['Month']=new_york_df['Start Time'].dt.month_name()
washington_df['Month']=washington_df['Start Time'].dt.month_name()
chicago_df['Day']=chicago_df['Start Time'].dt.day_name()
new_york_df['Day']=new_york_df['Start Time'].dt.day_name()
washington_df['Day']=washington_df['Start Time'].dt.day_name()

def get_city():
    cities = ['chicago' , 'new york' , 'washington']
    city = input("Choose a city from chicago, new york, washington:  ").lower()
    if city in cities :
        return city
    else:
        print("Invalid input. Please try again.")
        return get_city()
                
def get_month():
    months=['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november' , 'december']
    month=input("Enter the month you want to filter by or enter none if you don't want to filter by a month: ").lower()
    if month in months:
        return month
    elif month == 'none':
        return 'all'
    else :
        print("Invalid input. Please try again.")
        return get_month()
        
def get_day():
    days = ['monday','tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']
    day = input("Enter the day you want to filter by or enter none if you don't want to filter by a day: ").lower()
    if day in days:
        return day
    elif day == 'none':
        return 'all'
    else:
       print("Invalid input. Please try again.")
       return get_day()

def get_filters():
    city = get_city()
    month = get_month()
    day = get_day()
    return city,month,day

def load_data(city , month , day):
    filtered = City_Data[city]
    if month == 'all' and day == 'all':
        return filtered
    elif month == 'all':
        filt = (filtered['Day'] == day.capitalize())
        filtered = filtered[filt]
        return filtered
    elif day == 'all' :
        filt = (filtered['Month'] == month.capitalize())
        filtered = filtered[filt]
        return filtered
    else :
        filt = (filtered['Month'] == month.capitalize()) & (filtered['Day'] == day.capitalize())
        filtered = filtered[filt]
        return filtered
    
def time_stats(df):
    print('-'*40)
    print("Calculating the most popular times of travel : ")
    start_time = time.time()
    com_month = df['Month'].mode()
    com_day = df['Day'].mode()
    com_hour = df['Start Time'].dt.hour.mode()
    print("This took %s seconds." % (time.time()-start_time))
    print("The most popular month is " +com_month)
    print("The most popular day is " +com_day)
    print("The most popular hour is " +com_hour.astype('<U25'))
    print('-'*40)
    
def station_stats(df):
    print("Calculating the most popular stations and trip : ")
    start_time = time.time()
    com_start_station = df['Start Station'].mode()
    com_end_station = df['End Station'].mode()
    com_trip = ('from '+ df['Start Station'] + ' to ' + df['End Station']).mode()
    print("This took %s seconds." % (time.time()-start_time))
    print("The most popular start station is " +com_start_station)
    print("The most popular end station is " +com_end_station)
    print("The most popular trip is " +com_trip)
    print('-'*40)

pd.options.display.max_colwidth = 100

def trip_duration_stats(df):
    print("Calculating trip duration : ")
    start_time = time.time()
    total_travel_time = np.sum(df['Trip Duration'])
    avg_travel_time = np.average(df['Trip Duration'])
    print("This took %s seconds." % (time.time()-start_time))
    print("The total travel time is "+ total_travel_time.astype('<U25'))
    print("The average travel time is " +avg_travel_time.astype('<U25'))
    print('-'*40)
    
def user_stats(df):
    
    print("Calculating users info : ")
    start_time = time.time()
    user_type_count = df['User Type'].value_counts()
    print(user_type_count.astype('<U25'));
    
    if 'Gender' in df.columns:
        gender_count = df['Gender'].value_counts()
        earliest_dob = np.min(df['Birth Year'])
        most_recent_dob = np.max(df['Birth Year'])
        most_com_dob = df['Birth Year'].mode()
        print( gender_count.astype('<U25'))
        print("The earliest year of birth is " + earliest_dob.astype('<U25'))
        print("The most recent year of birth is : " + most_recent_dob.astype('<U25'))
        print("The most common year of birth is : " + most_com_dob.astype('<U25'));
    
    print("This took %s seconds." % (time.time()-start_time))
    print('-'*40)
    
def main():
    while True :
        city , month , day = get_filters()
        df = load_data(city , month , day)
        
        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        
        restart = input("Would you like to restart? Enter yes or no.")
        if restart.lower() != 'yes':
            print('*'*40)
            print("Hope the data was useful!")
            break
        
if __name__ == "__main__":
    main()    



Choose a city from chicago, new york, washington:  chicago
Enter the month you want to filter by or enter none if you don't want to filter by a month: none
Enter the day you want to filter by or enter none if you don't want to filter by a day: sunday
----------------------------------------
Calculating the most popular times of travel : 
This took 0.012908458709716797 seconds.
0    The most popular month is June
Name: Month, dtype: object
0    The most popular day is Sunday
Name: Day, dtype: object
0    The most popular hour is 15
Name: Start Time, dtype: object
----------------------------------------
Calculating the most popular stations and trip : 
This took 0.020020723342895508 seconds.
0    The most popular start station is Streeter Dr & Grand Ave
Name: Start Station, dtype: object
0    The most popular end station is Streeter Dr & Grand Ave
Name: End Station, dtype: object
0    The most popular trip is from Lake Shore Dr & Monroe St to Streeter Dr & Grand Ave
dtype: object
------