In [2]:
import time
import pandas as pd
import numpy as np
from datetime import datetime
import calendar

In [5]:
CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

In [7]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    df = pd.read_csv(CITY_DATA[city])
    df.drop('Unnamed: 0', axis=1, inplace=True)
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()
    if month != 'all':
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1
        df = df[df['month'] == month]
    if day != 'all':
        df = df[df['day_of_week'] == day.title()]

    print(f"Dataframe for {city.title()}, {month}rd month, and {day.title()}.")
    return df

df = load_data('chicago', 'all', 'friday')
df.head()

Dataframe for Chicago, 3rd month, and Friday.


ValueError: too many values to unpack (expected 3)

In [11]:
city = 'chicago'
def time_stats(df, month, day):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()
    
    df = pd.read_csv(CITY_DATA[city])
    df.drop('Unnamed: 0', axis=1, inplace=True)
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()
    if month != 'all':
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1
        df = df[df['month'] == month]
    if day != 'all':
        df = df[df['day_of_week'] == day.title()]
    # Add hour column
    df['hour'] = df['Start Time'].dt.hour

    try:
        if month == 'all' and day == 'all':
            # Display most common month, day, and hour
            common_month = df['month'].mode()[0]
            common_month_name = calendar.month_name[common_month]  # Convert month number to month name
            common_day = df['day_of_week'].mode()[0]
            common_hour = df['hour'].mode()[0]
            common_hour_12hr = datetime.strptime(str(common_hour), "%H").strftime("%I %p")  # Convert to 12-hour format

            print(f"Common month: {common_month_name}, Common day: {common_day}, Common hour: {common_hour_12hr}")

        elif month == 'all' and day != 'all':
            # Display most common month and hour for the selected all months and one day
            common_month = df['month'].mode()[0]
            common_month_name = calendar.month_name[common_month]
            common_hour = df[df['day_of_week'] == day]['hour'].mode()[0]
            common_hour_12hr = datetime.strptime(str(common_hour), "%H").strftime("%I %p")

            print(f"Common month: {common_month_name}, Common hour on {day}: {common_hour_12hr}")

        elif day == 'all' and month != 'all':
            # Display most common hour and day for the selected one month and all days
            common_day = df['day_of_week'].mode()[0]
            common_hour = df[df['month'] == month]['hour'].mode()[0]
            common_hour_12hr = datetime.strptime(str(common_hour), "%H").strftime("%I %p")

            print(f"Common hour in month of {calendar.month_name[month]}: {common_hour_12hr}, Common day: {common_day}")

        else:
            # Display most common hour for the selected one month and one day
            common_hour = df[(df['month'] == month) & (df['day_of_week'] == day)]['hour'].mode()[0]
            common_hour_12hr = datetime.strptime(str(common_hour), "%H").strftime("%I %p")

            print(f"Common hour on {day} in month of {calendar.month_name[month]}: {common_hour_12hr}")

    except KeyError as e:
        print(f"KeyError: {e} - Ensure the DataFrame contains the necessary columns.")
    except Exception as e:
        print(f"An error occurred: {e}")

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

df = pd.read_csv('chicago.csv')
month = 'all'
day = "monday"
time_stats(df, month, day)


Calculating The Most Frequent Times of Travel...

KeyError: 0 - Ensure the DataFrame contains the necessary columns.

This took 0.5821497440338135 seconds.
----------------------------------------


In [15]:
CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

city = 'chicago'
def time_stats_general(city):
    df = pd.read_csv(CITY_DATA[city])
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()
    df['hour'] = df['Start Time'].dt.hour
    print('Most common month:', calendar.month_name[df['month'].mode()[0]])
    print('Most common day of week:', df['day_of_week'].mode()[0])
    print('Most common start hour:', df['hour'].mode()[0])

time_stats_general(city)

Most common month: June
Most common day of week: Tuesday
Most common start hour: 17


In [20]:
# def calculate_time_stats(city,df):
#     print('Most common month:', calendar.month_name[df['month'].mode()[0]])
#     print('Most common day of week:', df['day_of_week'].mode()[0])
#     print('Most common start hour:', df['hour'].mode()[0])

#     most_common_month = calendar.month_name[df['month'].mode()[0]]
#     most_common_day = df['day_of_week'].mode()[0]
#     most_common_hour = df['hour'].mode()[0]

#     return most_common_month, most_common_day, most_common_hour

def determine_month_week_day(city):
    df = pd.read_csv(CITY_DATA[city])
    df.drop('Unnamed: 0', axis=1, inplace=True)
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()
    df['hour'] = df['Start Time'].dt.hour

    month = df['month']
    day = df['day_of_week']
    hour = df['hour']

    return month, day, hour


month, day, hour = determine_month_week_day(city)
# print(month)

In [23]:
def time_stats_general(city, month, day, hour):
    city = city.title()
    print(f"Most common month, day, and hour for city {city}")

    most_common_month = calendar.month_name[month.mode()[0]]
    most_common_day = day.mode()[0]
    most_common_hour = hour.mode()[0]

    print('Most common month:', calendar.month_name[month.mode()[0]])
    print('Most common day of week:', day.mode()[0])
    print('Most common start hour:', hour.mode()[0])

    return most_common_month, most_common_day, most_common_hour

time_stats_general(city, month, day, hour) 

Most common month, day, and hour for city Chicago
Most common month: June
Most common day of week: Tuesday
Most common start hour: 17


('June', 'Tuesday', np.int32(17))

In [3]:
df = pd.read_csv("chicago.csv")
def time_stats(df, month_all):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()
    if month_all != 'all':
        try:
            # display the most common month, day of week, and start hour
            common_month = df['month'].mode()[0]
            common_month_name = calendar.month_name[common_month]  # Convert month number to month name
            common_day = df['day_of_week'].mode()[0]

            # display the most common start hour in 12-hour format
            df['hour'] = df['Start Time'].dt.hour
            common_hour = df['hour'].mode()[0]
            common_hour_12hr = datetime.strptime(str(common_hour), "%H").strftime("%I %p")  # Convert to 12-hour format

            # print the most common month, day of week, and start hour
            print("Common month is: ", common_month_name, "\nCommon day is: ", common_day, "\nAnd finally common hour is: ", common_hour_12hr)

        except KeyError:
            print("The 'month', 'day_of_week', or 'Start Time' columns are not present in the dataset.")    
    
    else:
        print("You have chosen to display statistics for all months.")
        print("Please wait while we calculate the most common month, day of week, and start hour...")
        # display the most common month, day of week, and start hour
        common_month = df['month'].mode()[0]
        common_month_name = calendar.month_name[common_month]
        common_day = df['day_of_week'].mode()[0]

        # display the most common start hour in 12-hour format
        df['hour'] = df['Start Time'].dt.hour
        common_hour = df['hour'].mode()[0]
        common_hour_12hr = datetime.strptime(str(common_hour), "%H").strftime("%I %p")  # Convert to 12-hour format

        

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

    return common_month_name, common_day, common_hour_12hr
month_all = 'all'
time_stats(df, month_all)


Calculating The Most Frequent Times of Travel...

You have chosen to display statistics for all months.
Please wait while we calculate the most common month, day of week, and start hour...


KeyError: 'month'

In [3]:
CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    while True:
        city = input('Would you like to see data for Chicago, New York City, or Washington?').lower()
        if city in ['chicago', 'new york city', 'washington']:
            break
        else:
            print('Invalid input. Please enter a valid city name.')

    # get user input for month (all, january, february, ... , june)
    while True:
        month = input('Which month - January, February, March, April, May, or June?').lower()
        if month in ['all', 'january', 'february', 'march', 'april', 'may', 'june']:
            break
        else:
            print('Invalid input. Please enter a valid month name.')

    # get user input for day of week (all, monday, tuesday, ... sunday)
    while True:
        day = input('Which day - Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, or Sunday?').lower()
        if day in ['all', 'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']:
            break
        else:
            print('Invalid input. Please enter a valid day of the week.')

    print('-'*40)
    
    print("Your chosen City, Month(s) and Day(s) are:")
    return city, month, day

get_filters()

Hello! Let's explore some US bikeshare data!
----------------------------------------
Your chosen City, Month(s) and Day(s) are:


('chicago', 'january', 'monday')

In [4]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    df = pd.read_csv(CITY_DATA[city])
    df.drop('Unnamed: 0', axis=1, inplace=True)
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()
    if month != 'all':
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1
        df = df[df['month'] == month]
    if day != 'all':
        df = df[df['day_of_week'] == day.title()]

    print(f"Dataframe for {city.title()}, {month}rd month, and {day.title()}.")
    return df

df = load_data('chicago', 'march', 'friday')
df.head()

Dataframe for Chicago, 3rd month, and Friday.


Unnamed: 0,Start Time,End Time,Trip Duration,Start Station,End Station,User Type,Gender,Birth Year,month,day_of_week
37,2017-03-24 15:35:55,2017-03-24 15:46:10,615,Dearborn St & Erie St,State St & Van Buren St,Subscriber,Male,1989.0,3,Friday
93,2017-03-24 15:32:04,2017-03-24 15:52:53,1249,Sedgwick St & Webster Ave,Western Ave & Winnebago Ave,Subscriber,Female,1964.0,3,Friday
175,2017-03-24 15:10:29,2017-03-24 15:19:44,555,Franklin St & Monroe St,Aberdeen St & Monroe St,Subscriber,Male,1987.0,3,Friday
190,2017-03-24 12:29:30,2017-03-24 12:48:56,1166,Southport Ave & Wellington Ave,Lake Shore Dr & North Blvd,Subscriber,Female,1984.0,3,Friday
198,2017-03-31 08:25:53,2017-03-31 08:39:09,796,Clinton St & Jackson Blvd,Racine Ave (May St) & Fulton St,Subscriber,Male,1983.0,3,Friday


trial for asking user if they want to display 5 rowsat a time

In [5]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    df = pd.read_csv(CITY_DATA[city])
    df.drop('Unnamed: 0', axis=1, inplace=True)
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()

    if month != 'all':
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1
        df = df[df['month'] == month]

    if day != 'all':
        df = df[df['day_of_week'] == day.title()]

    return df

def display_data(df):
    """
    Asks the user if they want to see 5 rows of data and keeps iterating until the user says 'no'.
    """
    start_row = 0
    while True:
        show_data = input("\nWould you like to see 5 rows of data? Enter 'yes' or 'no': ").strip().lower()
        if show_data == 'yes':
            display(df.iloc[start_row:start_row+5])  # Display next 5 rows
            start_row += 5
            if start_row >= len(df):  # Stop if there are no more rows to display
                print("\n🚀 End of data reached.")
                break
        elif show_data == 'no':
            print("\n✅ Data display stopped.")
            break
        else:
            print("❌ Invalid input. Please enter 'yes' or 'no'.")

# Run the script
city, month, day = get_filters()
df = load_data(city, month, day)
display_data(df)  # Ask user if they want to display rows

Hello! Let's explore some US bikeshare data!
Invalid input. Please enter a valid city name.
Invalid input. Please enter a valid city name.
Invalid input. Please enter a valid city name.
----------------------------------------
Your chosen City, Month(s) and Day(s) are:

✅ Data display stopped.


In [6]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # display the most common month
    common_month = df['month'].mode()[0]
    common_month_name = calendar.month_name[common_month]  # Convert month number to month name

    # display the most common day of week
    common_day = df['day_of_week'].mode()[0]

    # display the most common start hour
    df['hour'] = df['Start Time'].dt.hour
    common_hour = df['hour'].mode()[0]
    common_hour_12hr = datetime.strptime(str(common_hour), "%H").strftime("%I %p")  # Convert to 12-hour format

    # print the most common month, day of week, and start hour
    print("Common month is: ", common_month_name, "\nCommon day is: ", common_day, "\nAnd finally common hour is: ", common_hour_12hr)
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

time_stats(df)


Calculating The Most Frequent Times of Travel...

Common month is:  April 
Common day is:  Monday 
And finally common hour is:  07 AM

This took 0.002615213394165039 seconds.
----------------------------------------


Hello! Let's explore some US bikeshare data!
----------------------------------------
Your chosen City, Month(s) and Day(s) are:


TypeError: load_data() missing 2 required positional arguments: 'month' and 'day'

In [7]:
city, month, day = get_filters()

def time_stats_general(city, df):
    """Displays statistics on the most frequent times of travel in the general dataset and not the filtered one.
    
    Returns and prints:
        (str) common_month_name - The most common month
        (str) common_day - The most common day
        (str) common_hour_12hr - The most common hour in 12-hour format
    """

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()
    
    df = pd.read_csv(CITY_DATA[city])

    try:
        # display the most common month, day of week, and start hour
        common_month = df['month'].mode()[0]
        common_month_name = calendar.month_name[common_month]  # Convert month number to month name
        common_day = df['day_of_week'].mode()[0]

        # display the most common start hour in 12-hour format
        df['hour'] = df['Start Time'].dt.hour
        common_hour = df['hour'].mode()[0]
        common_hour_12hr = datetime.strptime(str(common_hour), "%H").strftime("%I %p")  # Convert to 12-hour format

        # print the most common month, day of week, and start hour
        print("Common month is: ", common_month_name, "\nCommon day is: ", common_day, "\nAnd finally common hour is: ", common_hour_12hr)

    except KeyError:
        print("The 'month', 'day_of_week', or 'Start Time' columns are not present in the dataset.")    
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

    return common_month_name, common_day, common_hour_12hr

time_stats_general(city, df)

Hello! Let's explore some US bikeshare data!
----------------------------------------
Your chosen City, Month(s) and Day(s) are:

Calculating The Most Frequent Times of Travel...

The 'month', 'day_of_week', or 'Start Time' columns are not present in the dataset.

This took 0.3847208023071289 seconds.
----------------------------------------


UnboundLocalError: cannot access local variable 'common_month_name' where it is not associated with a value

In [36]:

def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # display the most common month
    common_month = df['month'].mode()[0]

    # display the most common day of week
    common_day = df['day_of_week'].mode()[0]

    # display the most common start hour
    df['hour'] = df['Start Time'].dt.hour
    common_hour = df['hour'].mode()[0]

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

    # print(common_month, common_day, df['hour'])

    return common_month.item(), common_day, common_hour.item()

# time_stats(df)

In [44]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    most_common_start_station = df['Start Station'].mode()[0]

    # display most commonly used end station
    most_common_end_station = df['End Station'].mode()[0]

    # display most frequent combination of start station and end station trip
    df['Start End Station'] = df['Start Station'] + ' to ' + df['End Station']
    most_common_combination = df['Start End Station'].mode()[0]

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)
    
    return most_common_start_station, most_common_end_station, most_common_combination

# station_stats(df)

In [38]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    total_travel_time = df['Trip Duration'].sum()

    # display mean travel time
    mean_travel_time = df['Trip Duration'].mean()

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

    return total_travel_time.item(), mean_travel_time.item()

# trip_duration_stats(df)


In [8]:
# df.head()

Unnamed: 0,Start Time,End Time,Trip Duration,Start Station,End Station,User Type,Gender,Birth Year,month,day_of_week,hour,Start End Station
37,2017-03-24 15:35:55,2017-03-24 15:46:10,615,Dearborn St & Erie St,State St & Van Buren St,Subscriber,Male,1989.0,3,Friday,15,Dearborn St & Erie St to State St & Van Buren St
93,2017-03-24 15:32:04,2017-03-24 15:52:53,1249,Sedgwick St & Webster Ave,Western Ave & Winnebago Ave,Subscriber,Female,1964.0,3,Friday,15,Sedgwick St & Webster Ave to Western Ave & Win...
175,2017-03-24 15:10:29,2017-03-24 15:19:44,555,Franklin St & Monroe St,Aberdeen St & Monroe St,Subscriber,Male,1987.0,3,Friday,15,Franklin St & Monroe St to Aberdeen St & Monro...
190,2017-03-24 12:29:30,2017-03-24 12:48:56,1166,Southport Ave & Wellington Ave,Lake Shore Dr & North Blvd,Subscriber,Female,1984.0,3,Friday,12,Southport Ave & Wellington Ave to Lake Shore D...
198,2017-03-31 08:25:53,2017-03-31 08:39:09,796,Clinton St & Jackson Blvd,Racine Ave (May St) & Fulton St,Subscriber,Male,1983.0,3,Friday,8,Clinton St & Jackson Blvd to Racine Ave (May S...


In [39]:
def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    user_types = df['User Type'].value_counts()

    # Display counts of gender
    gender = df['Gender'].value_counts()
    # female = df[df['Gender']=="Female"].shape[0]
    # male = df[df['Gender']=="Male"].shape[0]

    # Display earliest, most recent, and most common year of birth
    earliest_birth_year = df['Birth Year'].min()
    most_recent_birth_year = df['Birth Year'].max()
    most_common_birth_year = df['Birth Year'].mode()[0]

    # print(user_types, "\n")
    # print(gender, "\n")
    # print("Gender Count: ", gender, "\n", " female count: ", female, "\n", " male count: ", male)
    # print("Earliest Birth Year: ", earliest_birth_year, "\n", "Most Recent Birth Year: ", most_recent_birth_year, "\n", "Most Common Birth Year: ", most_common_birth_year)
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)
    
    return user_types, gender, earliest_birth_year, most_recent_birth_year, most_common_birth_year
    


# user_stats(df)

In [45]:

def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()


Hello! Let's explore some US bikeshare data!
----------------------------------------
Your chosen City, Month(s) and Day(s) are:
Dataframe for Chicago, 1rd month, and Monday.

Calculating The Most Frequent Times of Travel...


This took 0.0009007453918457031 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...


This took 0.0010209083557128906 seconds.
----------------------------------------

Calculating Trip Duration...


This took 0.0009558200836181641 seconds.
----------------------------------------

Calculating User Stats...


This took 0.0005030632019042969 seconds.
----------------------------------------
