In [1]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

In [2]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    while(True):
        city = input("\n Which city do you want to explore ? New York City , Chicago or Washington?\n")
        city = city.lower()
        if city in ['chicago','new york city','washington']:
            break
        else:
            print("wrong city!")
        
    # TO DO: get user input for month (all, january, february, ... , june)
    while(True):
        month = input("\nWhich month do you want to filter by?\n")
        month = month.lower()
        if month in ['all','january','february','march','april','may','june']:
            break
        else:
            print("wrong month!")

    # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
    while(True):
        day = input("\nWhich day do you want to filter by?\n")
        day = day.lower()
        if day in ['all','monday','tuesday','wednesday','thursday','friday','saturday','sunday']:
            break
        else:
            print("wrong day!")

    print('-'*40)
    return city, month, day

In [3]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """

    # load data file into a dataframe
    df = pd.read_csv(CITY_DATA[city])

    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # extract month and day of week from Start Time to create new columns
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()

    # filter by month if applicable
    if month != 'all':
        # use the index of the months list to get the corresponding int
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1

        # filter by month to create the new dataframe
        df = df[df['month'] == month]

    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        df = df[df['day_of_week'] == day.title()]

    return df

In [4]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # TO DO: display the most common month
    most_common_month = df['month'].mode()[0]
    print("The Most Common Months : ",most_common_month)

    # TO DO: display the most common day of week
    most_common_day = df['day_of_week'].mode()[0]
    print("The Most Common Day : ",most_common_day)

    # TO DO: display the most common start hour
    df['hour'] = df['Start Time'].dt.hour
    most_common_start_hour = df['hour'].mode()[0]
    print("The Most Common Start Hour : ",most_common_start_hour)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [5]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station
    most_common_start_station = df['Start Station'].mode()[0]
    print("The Most Common Start Station : ",most_common_start_station)

    # TO DO: display most commonly used end station
    most_common_used_end_station = df['End Station'].mode()[0]
    print("The Most Common Used End Station : ",most_common_used_end_station)

    # TO DO: display most frequent combination of start station and end station trip
    combination_start_end_trip = most_common_start_station+ " " + most_common_used_end_station
    print("The Most Frequent Combination Of Start Station And End Station Trip : ",combination_start_end_trip)
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [6]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time
    total_travel_time = df['Trip Duration'].sum()
    print("The Total Travel Time : ",total_travel_time)  

    # TO DO: display mean travel time
    mean_travel_time = df['Trip Duration'].mean()
    print("The Mean Travel Time : ",mean_travel_time) 

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [23]:

def user_stats(df,city):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types
    counts_user_types = df['User Type'].value_counts()
    print("The Counts Of User Types : ",counts_user_types) 
        


    # TO DO: Display earliest, most recent, and most common year of birth
    
    # Display earliest
    if city != 'washington':
        
        # TO DO: Display counts of gender
        counts_gender = df['Gender'].value_counts()
        print("The Counts Of Genders : ",counts_gender) 
    
        earliest_year_of_birth = df['Birth Year'].min()
        print("The Earliest Year Of Birth : ",earliest_year_of_birth) 
    
        # Display most recent
        most_recent = df['Birth Year'].max()
        print("The Most Recent Year Of Birth : ",most_recent) 
    
        # most common year of birth
        most_common_year_of_birth = df['Birth Year'].value_counts().idxmax()
        print('The Most Common Year Of Birth : ', most_common_year_of_birth)
    

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)
    

In [29]:
'''
    count = 0
    view = input('\nWould you like to view 5 rows of individual trip data? Enter yes or no\n')
    while (view.lower() == 'yes'):
        print(df[count:count+5])
        count = count + 5
        view = input("Do you wish to continue?: ").lower()  

'''    ;

In [25]:
def display_data(df):
    count = 0
    more_data = input('\nWould you like to view 5 rows of individual trip data? Enter yes or no\n')
    while more_data.lower() == 'yes':
        df_part = df.iloc[count: count+5]
        print(df_part)
        count += 5
        more_data = input('\nWould you like to see more data? Please! Enter,  yes   or   no.\n')

In [26]:
def user_stats(df,city):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    print('User Type Stats:')
    print(df['User Type'].value_counts())
    if city != 'washington':
        # Display counts of gender
        print('Gender Stats:')
        print(df['Gender'].value_counts())
        # Display earliest, most recent, and most common year of birth
        print('Birth Year Stats:')
        most_common_year = df['Birth Year'].mode()[0]
        print('Most Common Year:',most_common_year)
        most_recent_year = df['Birth Year'].max()
        print('Most Recent Year:',most_recent_year)
        earliest_year = df['Birth Year'].min()
        print('Earliest Year:',earliest_year)
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [27]:
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df,city)
        display_data(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


In [28]:
if __name__ == "__main__":
    main()


Hello! Let's explore some US bikeshare data!

 Which city do you want to explore ? New York City , Chicago or Washington?
chicago

Which month do you want to filter by?
may

Which day do you want to filter by?
sunday
----------------------------------------

Calculating The Most Frequent Times of Travel...

The Most Common Months :  5
The Most Common Day :  Sunday
The Most Common Start Hour :  12

This took 0.004838705062866211 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

The Most Common Start Station :  Streeter Dr & Grand Ave
The Most Common Used End Station :  Streeter Dr & Grand Ave
The Most Frequent Combination Of Start Station And End Station Trip :  Streeter Dr & Grand Ave Streeter Dr & Grand Ave

This took 0.002931833267211914 seconds.
----------------------------------------

Calculating Trip Duration...

The Total Travel Time :  10795584
The Mean Travel Time :  1279.7041251778094

This took 0.0 seconds.
----------------


Would you like to see more data? Please! Enter,  yes   or   no.
yes
      Unnamed: 0          Start Time             End Time  Trip Duration  \
1350      996266 2017-05-28 18:48:33  2017-05-28 20:38:20           6587   
1356      909742 2017-05-21 13:30:47  2017-05-21 13:53:55           1388   
1372      831088 2017-05-14 16:30:50  2017-05-14 16:45:50            900   
1433      910906 2017-05-21 14:37:19  2017-05-21 14:57:03           1184   
1452      824687 2017-05-14 10:39:28  2017-05-14 11:06:38           1630   

                 Start Station                End Station   User Type  Gender  \
1350   Broadway & Cornelia Ave    Broadway & Cornelia Ave    Customer     NaN   
1356     Michigan Ave & Oak St      Michigan Ave & Oak St    Customer     NaN   
1372     Dayton St & North Ave   Kingsbury St & Kinzie St    Customer     NaN   
1433   Streeter Dr & Grand Ave  Larrabee St & Webster Ave  Subscriber  Female   
1452  Lincoln Ave & Leavitt St   Wells St & Evergreen Ave  Subscriber

     Unnamed: 0          Start Time             End Time  Trip Duration  \
344      932889 2017-05-23 11:12:06  2017-05-23 11:40:10           1684   
376      849614 2017-05-16 06:50:35  2017-05-16 06:53:41            186   
377      770435 2017-05-09 16:02:31  2017-05-09 16:11:46            555   
492      773083 2017-05-09 18:17:43  2017-05-09 18:38:57           1274   
519      850711 2017-05-16 07:52:53  2017-05-16 08:00:58            485   

                  Start Station                  End Station   User Type  \
344      State St & Harrison St            Adler Planetarium    Customer   
376   Claremont Ave & Hirsch St       Damen Ave & Pierce Ave  Subscriber   
377    Canal St & Monroe St (*)        Dearborn St & Erie St  Subscriber   
492       Michigan Ave & Oak St  Lake Shore Dr & Belmont Ave    Customer   
519  Halsted St & Diversey Pkwy     Halsted St & Dickens Ave  Subscriber   

     Gender  Birth Year  month day_of_week  hour  
344     NaN         NaN      5     Tuesda