In [1]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    
    while True:
        city=input('Type the name of the city you need to analyze among (chicago, new york city, washington): ').lower()
        if city in ['chicago', 'new york city', 'washington']:
            break
        else:
            print('please, type a city from given ones only')
            continue
      
    
        
    # TO DO: get user input for month (all, january, february, ... , june)

    while True:
        month=input('Type the name of the month to filter by, or "all" to apply no month filter: ').lower()
        if month in ['all','january','february','march','april','may','june']:
            break
        else:
            print("please enter a month that's in this list ['all','january','february','march','april','may','june']")
            continue

    # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
    while True:
        day=input('Type the name of the day to filter by, or "all" to apply no day filter: ').lower()
        if day in ['all','saturday','sunday','monday','tuesday','wednesday','thursday','friday']:
            break
        else:
            print("please enter a day that's in this list['all','saturday','sunday','monday','tuesday','wednesday','thursday','friday']")
            continue


    print('-'*40)
    return city, month, day


def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    # load data file into a dataframe
    df = pd.read_csv(CITY_DATA[city])

    # convert the Start Time column to datetime
    df['Start Time'] =pd.to_datetime(df['Start Time']) 

    # extract month and day of week from Start Time to create new columns
    df['month'] =df['Start Time'].dt.month 
    df['day_of_week'] =df['Start Time'].dt.day_name() 


    # filter by month if applicable
    if month != 'all':
        # use the index of the months list to get the corresponding int
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month =months.index(month) +1 
    
        # filter by month to create the new dataframe
        df = df[df['month'] == month]

    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        df =df[df['day_of_week']==day.title()] 
    
    return df
    


def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # TO DO: display the most common month
    #extracting the most common month using mode
    popular_month=df['month'].mode()[0]
    # converting month number to month name to display:
    if popular_month == 1:
        popular_month = 'January'
    elif popular_month == 2:
        popular_month = 'February'
    elif popular_month == 3:
        popular_month = 'March'
    elif popular_month == 4:
        popular_month = 'April'
    elif popular_month == 5:
        popular_month = 'May'
    elif popular_month == 6:
        popular_month = 'June'
        
    print('Most Common Month:', popular_month)
    


    # TO DO: display the most common day of week
    #extracting the most common day using mode
    popular_day=df['day_of_week'].mode()[0]
    print('Most Common day:', popular_day)
    

    # TO DO: display the most common start hour
    # extract hour from the Start Time column to create an hour column
    df['hour'] =df['Start Time'].dt.hour

    # extracting the most common hour (from 0 to 23)
    popular_hour = df['hour'].mode()[0]
    print('Most common Start Hour:', popular_hour)


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station
    popular_start_station=df['Start Station'].mode()[0]
    count=df['Start Station'].value_counts()[0]
    print('Most common Start Station:', popular_start_station+', with count of:',count)


    # TO DO: display most commonly used end station
    popular_end_station=df['End Station'].mode()[0]
    count=df['End Station'].value_counts()[0]
    print('Most common End Station:', popular_end_station+', with count of:',count)


    # TO DO: display most frequent combination of start station and end station trip
    df['Trip']=df['Start Station']+' to: '+df['End Station']
    popular_trip=df['Trip'].mode()[0]
    count=df['Trip'].value_counts()[0]
    print('Most common Trip is from:', popular_trip+', with count of:',count)



    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time
    total_travel_time=np.sum(df['Trip Duration'])
    print('Total travel time in days:', total_travel_time/(60*24))
    

    # TO DO: display mean travel time
    average_travel_time=np.mean(df['Trip Duration'])
    print('Average travel time in seconds:', average_travel_time)
        


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types
    user_type_count=df['User Type'].value_counts()
    print('User type count is:\n',user_type_count)
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)
    
    start_time = time.time()

    # TO DO: Display counts of gender
    while True:
        try:
            gender_count=df['Gender'].value_counts()
            print('Gender count is:\n',gender_count)
            print("\nThis took %s seconds." % (time.time() - start_time))
            print('-'*40)
            
            start_time = time.time()

            # TO DO: Display earliest, most recent, and most common year of birth
            earliest_birth_year=np.min(df['Birth Year'])
            print('The earliest year of birth is:', earliest_birth_year)
            
            most_recent_birth_year=np.max(df['Birth Year'])
            print('The most recent year of birth is:', most_recent_birth_year)
            
            most_common_birth_year=df['Birth Year'].mode()[0]
            print('The most common year of birth is:', most_common_birth_year)

            
            print("\nThis took %s seconds." % (time.time() - start_time))
            print('-'*40)
            break
        except:
            break
    
    
def raw_data_display(df):
    '''Ask the user if they want to display lines of raw data'''
    start_index=0
    end_index=5
    df_length=len(df.index)
    
    while start_index < df_length:
        raw_data = input("\nWould you like to see 5 lines of raw data? Enter 'yes' or 'no'.\n")
        if raw_data.lower() == 'yes':
            print("\nDisplaying only 5 lines of data...\n")
            if end_index > df_length:
                end_index=df_length
            print(df.iloc[start_index:end_index])
            start_index+=5
            end_index+=5
        else:
            break
    
    

def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        raw_data_display(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()


Hello! Let's explore some US bikeshare data!
Type the name of the city you need to analyze among (chicago, new york city, washington): chicago
Type the name of the month to filter by, or "all" to apply no month filter: march
Type the name of the day to filter by, or "all" to apply no day filter: all
----------------------------------------

Calculating The Most Frequent Times of Travel...

Most Common Month: March
Most Common day: Friday
Most common Start Hour: 17

This took 0.07700443267822266 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

Most common Start Station: Clinton St & Washington Blvd, with count of: 604
Most common End Station: Clinton St & Washington Blvd, with count of: 588
Most common Trip is from: Calumet Ave & 33rd St to: State St & 33rd St, with count of: 45

This took 0.36602115631103516 seconds.
----------------------------------------

Calculating Trip Duration...

Total travel time in days: 14564.180555555555
