In [1]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

In [2]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    while True:
        city = input("enter name:").lower()
        if city in CITY_DATA:
            break
        else: 
            print("invalid")

    # get user input for month (all, january, february, ... , june)
    while True:
        month = input("Enter the month (all, january, february, ..., june): ").lower()
        if month in ['all', 'january', 'february', 'march', 'april', 'may', 'june']:
            break
        else:
            print("Invalid month. Please try again.")

    # get user input for day of week (all, monday, tuesday, ... sunday)
    while True:
        day = input("Enter the day of the week (all, monday, tuesday, ..., sunday): ").lower()
        if day in ['all', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']:
            break
        else:
            print("Invalid day. Please try again.")

    print('-'*40)
    return city, month, day

In [3]:
def load_data(city, month, day):
    
    #load data 
    df = pd.read_csv(CITY_DATA[city])
    
    # Convert Start Time to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    
    # create month and day column
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()
    df['hour'] = df['Start Time'].dt.hour
    
    # Filter by month if needed
    if month != 'all':
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month_num = months.index(month) + 1
        df = df[df['month'] == month_num]
    
    # Filter by day if needed
    if day != 'all':
        df = df[df['day_of_week'] == day.title()]
    
    return df

In [4]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()
 
 # display the most common month
    common_month_num = df['month'].mode()[0]
    months = ['january', 'february', 'march', 'april', 'may', 'june']
    print(f"\nMost common month: {months[common_month_num-1].title()}")
 
 # display the most common day of week
    common_week_day = df['day_of_week'].mode()[0]
    print(f"\nMost common day: {common_week_day} ")
    
 # display the most common start hour
    common_hour_start = df['hour'].mode()[0]
    print(f"\nMost common day: {common_hour_start}")
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [5]:
#Main program flow
def display_raw_data(df):
    """Displays 5 lines of raw data if user says yes, continues until user says no."""
    i = 0
    while True:
        show_raw = input('\nWould you like to see 5 lines of raw data? Enter yes or no.\n').lower()
        if show_raw == 'yes':
            print(df.iloc[i:i+5])
            i += 5
        elif show_raw == 'no':
            break
        else:
            print('Invalid input. Please enter yes or no.')

# Your existing code
city, month, day = get_filters()
print(f"Filters selected: {city}, {month}, {day}")
df = load_data(city, month, day)
print(f"\nLoaded {len(df)} rows of filtered data")

# Show first 5 rows by default
print("\nFirst 5 rows of filtered data:")
print(df.head(5))

# Offer to show more raw data
display_raw_data(df)

Hello! Let's explore some US bikeshare data!
enter name:new york city
Enter the month (all, january, february, ..., june): february
Enter the day of the week (all, monday, tuesday, ..., sunday): tuesday
----------------------------------------
Filters selected: new york city, february, tuesday

Loaded 5573 rows of filtered data

First 5 rows of filtered data:
     Unnamed: 0          Start Time             End Time  Trip Duration  \
14      1512596 2017-02-28 19:26:43  2017-02-28 19:35:21            518   
54      1507415 2017-02-28 18:03:29  2017-02-28 18:10:28            418   
251     1226634 2017-02-21 08:17:11  2017-02-21 08:26:21            550   
254     1005386 2017-02-14 07:39:33  2017-02-14 07:55:13            939   
339     1228070 2017-02-21 08:40:42  2017-02-21 09:06:16           1534   

                   Start Station                   End Station   User Type  \
14           N 11 St & Wythe Ave      Bushwick Ave & Powers St  Subscriber   
54       Columbus Ave & W 103 S

In [6]:
df.head(5)

Unnamed: 0.1,Unnamed: 0,Start Time,End Time,Trip Duration,Start Station,End Station,User Type,Gender,Birth Year,month,day_of_week,hour
14,1512596,2017-02-28 19:26:43,2017-02-28 19:35:21,518,N 11 St & Wythe Ave,Bushwick Ave & Powers St,Subscriber,Male,1983.0,2,Tuesday,19
54,1507415,2017-02-28 18:03:29,2017-02-28 18:10:28,418,Columbus Ave & W 103 St,W 106 St & Central Park West,Subscriber,Male,1990.0,2,Tuesday,18
251,1226634,2017-02-21 08:17:11,2017-02-21 08:26:21,550,Pershing Square North,E 17 St & Broadway,Subscriber,Male,1969.0,2,Tuesday,8
254,1005386,2017-02-14 07:39:33,2017-02-14 07:55:13,939,W 82 St & Central Park West,5 Ave & E 88 St,Subscriber,Male,1957.0,2,Tuesday,7
339,1228070,2017-02-21 08:40:42,2017-02-21 09:06:16,1534,E 85 St & 3 Ave,W 38 St & 8 Ave,Subscriber,,,2,Tuesday,8


In [7]:
df['Trip'] = df['Start Station'] + " to " + df['End Station']
print(f"\n {df['Trip']}")


 14          N 11 St & Wythe Ave to Bushwick Ave & Powers St
54        Columbus Ave & W 103 St to W 106 St & Central ...
251             Pershing Square North to E 17 St & Broadway
254          W 82 St & Central Park West to 5 Ave & E 88 St
339                      E 85 St & 3 Ave to W 38 St & 8 Ave
                                ...                        
299621          Dean St & Hoyt St to Hicks St & Montague St
299694    Cathedral Pkwy & Broadway to Riverside Dr & W ...
299794                   W 38 St & 8 Ave to 1 Ave & E 68 St
299818            W 20 St & 8 Ave to University Pl & E 8 St
299971       Broadway & Roebling St to S 3 St & Bedford Ave
Name: Trip, Length: 5573, dtype: object


In [8]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    
    common_start = df['Start Station'].mode()[0]
    start_count = df['Start Station'].value_counts().max()
    print(f"\nMost common start station: {common_start} ({start_count} trips)")

    # display most commonly used end station
    
    common_end = df['End Station'].mode()[0]
    end_count = df['End Station'].value_counts().max()
    print(f"Most common end station: {common_end} ({end_count} trips)")


    # display most frequent combination of start station and end station trip
    df['Trip'] = df['Start Station'] + " to " + df['End Station']
    common_trip = df['Trip'].mode()[0]
    trip_count = df['Trip'].value_counts().max()
    print(f"Most common trip: {common_trip} ({trip_count} trips)")
    
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)
    

In [9]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()
    
    # Convert duration to minutes for more readable output
    total_seconds = df['Trip Duration'].sum()
    mean_seconds = df['Trip Duration'].mean()

    # Convert seconds to days, hours, minutes
    total_days = total_seconds // (24 * 3600)
    total_seconds %= (24 * 3600)
    total_hours = total_seconds // 3600
    total_seconds %= 3600
    total_mins = total_seconds // 60

    # Display total travel time
    print(f"Total travel time: {int(total_days)} days, {int(total_hours)} hours, {int(total_mins)} minutes")

    # Display mean travel time (in minutes and seconds)
    mean_mins = mean_seconds // 60
    mean_secs = mean_seconds % 60
    print(f"Average trip duration: {int(mean_mins)} minutes {int(mean_secs)} seconds")

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [10]:
def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # 1. Display counts of user types
    print("\nUser Type Breakdown:")
    print(df['User Type'].value_counts().to_string())

    # 2. Display counts of gender (if column exists)
    if 'Gender' in df.columns:
        print("\nGender Breakdown:")
        print(df['Gender'].value_counts().to_string())
    else:
        print("\nGender data not available for this city")

    # 3. Display birth year stats (if column exists)
    if 'Birth Year' in df.columns:
        print("\nBirth Year Statistics:")
        print(f"Earliest: {int(df['Birth Year'].min())}")
        print(f"Most recent: {int(df['Birth Year'].max())}")
        print(f"Most common: {int(df['Birth Year'].mode()[0])}")
    else:
        print("\nBirth year data not available for this city")

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [11]:
#Main program flow
def display_raw_data(df):
    """Displays 5 lines of raw data if user says yes, continues until user says no."""
    i = 0
    while True:
        show_raw = input('\nWould you like to see 5 lines of raw data? Enter yes or no.\n').lower()
        if show_raw == 'yes':
            print(df.iloc[i:i+5])
            i += 5
        elif show_raw == 'no':
            break
        else:
            print('Invalid input. Please enter yes or no.')

# Your existing code
city, month, day = get_filters()
print(f"Filters selected: {city}, {month}, {day}")
df = load_data(city, month, day)
print(f"\nLoaded {len(df)} rows of filtered data")

# Show first 5 rows by default
print("\nFirst 5 rows of filtered data:")
print(df.head(5))

# Offer to show more raw data
display_raw_data(df)
time_stats(df)
station_stats(df)
trip_duration_stats(df)
user_stats(df)

Hello! Let's explore some US bikeshare data!
enter name:washington
Enter the month (all, january, february, ..., june): january
Enter the day of the week (all, monday, tuesday, ..., sunday): tuesday
----------------------------------------
Filters selected: washington, january, tuesday

Loaded 5148 rows of filtered data

First 5 rows of filtered data:
     Unnamed: 0          Start Time             End Time  Trip Duration  \
103        8178 2017-01-03 18:12:00  2017-01-03 18:22:00        601.190   
127       37143 2017-01-10 07:16:00  2017-01-10 07:18:00        161.361   
144       80495 2017-01-17 09:48:00  2017-01-17 09:53:00        318.804   
148       82223 2017-01-17 17:54:00  2017-01-17 18:09:00        898.953   
220       41952 2017-01-10 20:23:00  2017-01-10 20:27:00        267.040   

                        Start Station                       End Station  \
103       Wilson Blvd & N Edgewood St              Glebe Rd & 11th St N   
127        North Capitol St & F St NW  4th & 

In [None]:
#Main program flow
'''city, month, day = get_filters()
print(f"Filters selected: {city}, {month}, {day}")
df = load_data(city, month, day)
print(f"\nLoaded {len(df)} rows of filtered data")
df.head(5)
time_stats(df)
station_stats(df)
trip_duration_stats(df)
user_stats(df)'''