In [1]:
import time
import pandas as pd
import numpy as np

In [2]:
chicago = pd.read_csv(r'C:\Users\Cvetana\Documents\Udacity projects\Python project Udacity\Python project\chicago.csv')

In [3]:
new_york = pd.read_csv(r'C:\Users\Cvetana\Documents\Udacity projects\Python project Udacity\Python project\new_york_city.csv')

In [4]:
washington = pd.read_csv(r'C:\Users\Cvetana\Documents\Udacity projects\Python project Udacity\Python project\washington.csv')

In [5]:
CITY_DATA = { 'chicago': chicago,
              'new york': new_york,
              'washington': washington }

In [6]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    # First argument of get_filters(): CITY
    print('Hello! Let\'s explore some US bikeshare data!')
    while True:
        city = input('\nWould you like to see data for Chicago, New York or Washington?\n').lower()
        if city in ['chicago', 'new york', 'washington']:
            print(f'\nBikeshare data to be displayed for: {city.title()}.\n')
            break
        print('\nPlease enter a valid city name!\n')
    # Second argument of get_filters(): MONTH:
    print('\nNow let\'s specify a time filter!\n')
    while True:
        month = input('\nWould you like to see data for a specific month?\n' 
                  'If so, please enter a month from January to June.\n'
                'If not, please enter "all" for all months.\n').lower()
        if month in ['january', 'february', 'march', 'april', 'may', 'june', 'all']:
            print(f'\nBikeshare data to be displayed for: {month.capitalize()}.\n')
            break
        print('\nPlease enter a valid month or "all" for all months!\n')    
    # Third argument of get_filters(): DOW:
    print('\nAnd another time filter!\n')
    while True:
        day = input('\nWould you like to see data for a specific day of the week?\n' 
                  'If so, please enter a day.\n'
                'If not, please enter "all" for all days.\n').lower()
        if day in ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', 'all']:
            print(f'\nBikeshare data to be displayed for: {day.capitalize()}.\n')
            break
        print('\nPlease enter a valid day of the week or "all" for all days!\n')    
    print('-'*40)
    
    return city, month, day


def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """

    # load data file into a dataframe
    df = CITY_DATA[city]

    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # extract month and day of week from Start Time to create new columns
    df['month'] = df['Start Time'].dt.month_name()
    df['day_of_week'] = df['Start Time'].dt.day_name()

    # filter by month if applicable
    if month != 'all':
        # filter by month to create the new dataframe
        df = df[df['month'] == month.title()]
            
    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        df = df[df['day_of_week'] == day.title()]
            
    return df

     
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()
    
    # display the most common month
    popular_month = df['month'].mode()[0]
    month_highest = df['month'].value_counts().max()
    print(f'Most Popular Month: {popular_month}, count: {month_highest}.')
   
    # display the most common day of week
    popular_day = df['day_of_week'].mode()[0]
    day_highest = df['day_of_week'].value_counts().max()
    print(f'Most Popular Day of Week: {popular_day}, count: {day_highest}.')
   
    # display the most common start hour
    df['hour'] = df['Start Time'].dt.hour
    popular_hour = df['hour'].mode()[0]
    hour_highest = df['hour'].value_counts().max()
    print(f'Most Popular Start Hour: {popular_hour}, count: {hour_highest}.')

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    start_mode = df['Start Station'].mode()[0]
    start_highest = df['Start Station'].value_counts().max()
    print(f'Most Popular Start Station: {start_mode}, count: {start_highest}.')
    
    # display most commonly used end station
    end_mode = df['End Station'].mode()[0]
    end_highest = df['End Station'].value_counts().max()
    print(f'Most Popular End Station: {end_mode}, count: {end_highest}.')
    
    # display most frequent combination of start station and end station trip
    df['trip'] = df['Start Station']+ ' - ' + df['End Station'].map(str)
    trip_mode = df['trip'].mode()[0]
    trip_highest = df['trip'].value_counts().max()
    print(f'Most Popular Trip from Start to End Station: {trip_mode}, count: {trip_highest}.')
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()
    
    # display total travel time
    total_time = pd.to_timedelta(df['Trip Duration'].sum(), unit ='s')
    print(f'Total Trip Duration: {total_time}.')
    
    # display mean travel time
    avg_time = pd.to_timedelta(df['Trip Duration'].mean(), unit ='s')
    print(f'Average Trip Duration: {avg_time}.')
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    user_types = df['User Type'].value_counts()
    print(f'User types:\n {user_types}')

    # Display counts of gender
    try:
        gender = df['Gender'].value_counts()
        print(f'\nUsers according to gender:\n {gender}')
    except KeyError:
        print('\nGender data not available.')
    
    # Display earliest, most recent, and most common year of birth
    # convert to datetime, cast astype int64 
    try:
        df['year_num'] = pd.to_datetime(df['Birth Year'], format = '%Y').dt.year.astype('Int64')
        year_min = df['year_num'].min()
        year_max = df['year_num'].max()
        year_mode = df['year_num'].mode()[0]
        print('\nEarliest, most recent, and most common year of birth:...')
        print(f'\nThe oldest user was born in: {year_min}.', 
              f'\nThe youngest user was born in: {year_max}.',
              f'\nThe most common year of birth is: {year_mode}.')         
    except KeyError:
        print('\nYear of birth data not available.')
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

    
#View raw data, 5 rows at a time
def raw_data(df):
    """Displays 5 rows of raw data at a time at the user's request."""
    
    view_raw = input('\nWould you like to view some raw data? Please type yes or no.\n').lower()
    while True: 
        if view_raw in ['yes', 'no']:
            view_first = df.iloc[:5]
            print(f'\nHere are the first 5 rows of the data sheet\n: {view_first}.\n')
            break
        print('\nPlease type yes or no.\n') 
    # Ask to view 5 more:
    i = 5
    while True:
        view_more = input('\nWould you like to view 5 more rows? Please type yes or no.\n').lower()
        if view_more == 'yes':
            more_rows = df.iloc[i:i+5]
            print(more_rows)
            i += 5
        elif view_more == 'no':
            break      
        elif view_more not in ['yes', 'no']:
            print('\nPlease type yes or no.\n')
    
# MAIN FUCTION
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)
        
        
        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        raw_data(df)
        
        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()


Hello! Let's explore some US bikeshare data!

Would you like to see data for Chicago, New York or Washington?
chicago

Bikeshare data to be displayed for: Chicago.


Now let's specify a time filter!


Would you like to see data for a specific month?
If so, please enter a month from January to June.
If not, please enter "all" for all months.
all

Bikeshare data to be displayed for: All.


And another time filter!


Would you like to see data for a specific day of the week?
If so, please enter a day.
If not, please enter "all" for all days.
all

Bikeshare data to be displayed for: All.

----------------------------------------

Calculating The Most Frequent Times of Travel...

Most Popular Month: June, count: 98081.
Most Popular Day of Week: Tuesday, count: 45912.
Most Popular Start Hour: 17, count: 35992.

This took 0.15897917747497559 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

Most Popular Start Station: Streeter Dr & Grand Ave