In [1]:
import os
print(os.getcwd())
import time
import pandas as pd
import numpy as np


C:\Users\user\Downloads\all-project-files (2)


In [2]:
CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

In [3]:
def get_filters():
    print("Hello! Let's explore some US bikeshare data!")

    cities = list(CITY_DATA.keys())
    months = ['january', 'february', 'march', 'april', 'may', 'june', 'all']
    days = ['monday', 'tuesday', 'wednesday', 'thursday',
            'friday', 'saturday', 'sunday', 'all']

    while True:
        city = input("Enter city (Chicago, New York City, Washington): ").lower()
        if city in cities:
            break
        print("Invalid city. Please try again.")

    while True:
        month = input("Enter month (Januaryâ€“June) or 'all': ").lower()
        if month in months:
            break
        print("Invalid month. Please try again.")

    while True:
        day = input("Enter day (Mondayâ€“Sunday) or 'all': ").lower()
        if day in days:
            break
        print("Invalid day. Please try again.")

    print("-" * 40)
    return city, month, day


In [4]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.
    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    # Load data file into a dataframe
    df = pd.read_csv(CITY_DATA[city])
    
    # Convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    
    # Extract month and day of week from Start Time to create new columns
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name()
    
    # Filter by month if applicable
    if month != 'all':
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1
        df = df[df['month'] == month]
    
    if day != 'all':
        df = df[df['day_of_week'] == day.title()]
    
    return df

In [5]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""
    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()
    
    common_month = df['month'].mode()[0]
    months = ['January', 'February', 'March', 'April', 'May', 'June']
    print(f"Most Common Month: {months[common_month - 1]}")
    
    common_day = df['day_of_week'].mode()[0]
    print(f"Most Common Day of Week: {common_day}")
    
    df['hour'] = df['Start Time'].dt.hour
    common_hour = df['hour'].mode()[0]
    print(f"Most Common Start Hour: {common_hour}:00")
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [6]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""
    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()
    
    common_start = df['Start Station'].mode()[0]
    print(f"Most Common Start Station: {common_start}")
    
    common_end = df['End Station'].mode()[0]
    print(f"Most Common End Station: {common_end}")
    
    df['Trip'] = df['Start Station'] + " -> " + df['End Station']
    common_trip = df['Trip'].mode()[0]
    print(f"Most Frequent Trip: {common_trip}")
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [7]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""
    print('\nCalculating Trip Duration...\n')
    start_time = time.time()
    
    total_duration = df['Trip Duration'].sum()
    days = total_duration // (24 * 3600)
    hours = (total_duration % (24 * 3600)) // 3600
    minutes = (total_duration % 3600) // 60
    seconds = total_duration % 60
    
    print(f"Total Travel Time: {int(days)} days, {int(hours)} hours, {int(minutes)} minutes, {int(seconds)} seconds")
    
    mean_duration = df['Trip Duration'].mean()
    mean_hours = mean_duration // 3600
    mean_minutes = (mean_duration % 3600) // 60
    mean_seconds = mean_duration % 60
    
    if mean_hours > 0:
        print(f"Average Travel Time: {int(mean_hours)} hours, {int(mean_minutes)} minutes, {int(mean_seconds)} seconds")
    else:
        print(f"Average Travel Time: {int(mean_minutes)} minutes, {int(mean_seconds)} seconds")
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [8]:
def user_stats(df):
    """Displays statistics on bikeshare users."""
    print('\nCalculating User Stats...\n')
    start_time = time.time()
    
    print("User Types:")
    user_types = df['User Type'].value_counts()
    for user_type, count in user_types.items():
        print(f"  {user_type}: {count}")
    
    if 'Gender' in df.columns:
        print("\nGender Breakdown:")
        gender_counts = df['Gender'].value_counts()
        for gender, count in gender_counts.items():
            print(f"  {gender}: {count}")
    else:
        print("\nGender: Data not available for this city")
    
    if 'Birth Year' in df.columns:
        earliest_year = int(df['Birth Year'].min())
        recent_year = int(df['Birth Year'].max())
        common_year = int(df['Birth Year'].mode()[0])
        current_year = 2025
        oldest_age = current_year - earliest_year
        youngest_age = current_year - recent_year
        common_age = current_year - common_year
        
        print(f"\nBirth Year Stats:")
        print(f"  Oldest rider was born in {earliest_year} (age {oldest_age})")
        print(f"  Youngest rider was born in {recent_year} (age {youngest_age})")
        print(f"  Most common birth year is {common_year} (age {common_age})")
    else:
        print("\nBirth Year: Data not available for this city")
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [9]:
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)

        row_index = 0
        while True:
            view_data = input("\nWould you like to see 5 lines of raw data? Enter yes or no: ").lower()
            if view_data == 'yes':
                print(df.iloc[row_index:row_index + 5])
                row_index += 5
                if row_index >= len(df):
                    print("\nNo more data to display.")
                    break
            else:
                break

        restart = input("\nWould you like to restart? Enter yes or no: ").lower()
        if restart != 'yes':
            print("\nThanks for exploring the bikeshare data! Have a great day! ðŸš´")
            break


if __name__ == '__main__':
    main()


Hello! Let's explore some US bikeshare data!


Enter city (Chicago, New York City, Washington):  chicago
Enter month (Januaryâ€“June) or 'all':  all
Enter day (Mondayâ€“Sunday) or 'all':  all


----------------------------------------

Calculating The Most Frequent Times of Travel...

Most Common Month: June
Most Common Day of Week: Tuesday
Most Common Start Hour: 17:00

This took 0.06202340126037598 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

Most Common Start Station: Streeter Dr & Grand Ave
Most Common End Station: Streeter Dr & Grand Ave
Most Frequent Trip: Lake Shore Dr & Monroe St -> Streeter Dr & Grand Ave

This took 0.1853644847869873 seconds.
----------------------------------------

Calculating Trip Duration...

Total Travel Time: 3250 days, 19 hours, 56 minutes, 27 seconds
Average Travel Time: 15 minutes, 36 seconds

This took 0.001348257064819336 seconds.
----------------------------------------

Calculating User Stats...

User Types:
  Subscriber: 238889
  Customer: 61110
  Dependent: 1

Gender Breakdown:
  Male: 181190
  Female: 57758

Birth Year Stats:
  Oldest rider was born in 1899 (age 126)
  Youngest


Would you like to see 5 lines of raw data? Enter yes or no:  yes


   Unnamed: 0          Start Time             End Time  Trip Duration  \
0     1423854 2017-06-23 15:09:32  2017-06-23 15:14:53            321   
1      955915 2017-05-25 18:19:03  2017-05-25 18:45:53           1610   
2        9031 2017-01-04 08:27:49  2017-01-04 08:34:45            416   
3      304487 2017-03-06 13:49:38  2017-03-06 13:55:28            350   
4       45207 2017-01-17 14:53:07  2017-01-17 15:02:01            534   

                   Start Station                   End Station   User Type  \
0           Wood St & Hubbard St       Damen Ave & Chicago Ave  Subscriber   
1            Theater on the Lake  Sheffield Ave & Waveland Ave  Subscriber   
2             May St & Taylor St           Wood St & Taylor St  Subscriber   
3  Christiana Ave & Lawrence Ave  St. Louis Ave & Balmoral Ave  Subscriber   
4         Clark St & Randolph St  Desplaines St & Jackson Blvd  Subscriber   

   Gender  Birth Year  month day_of_week  hour  \
0    Male      1992.0      6      Friday  


Would you like to see 5 lines of raw data? Enter yes or no:  no

Would you like to restart? Enter yes or no:  yes


Hello! Let's explore some US bikeshare data!


Enter city (Chicago, New York City, Washington):  new york city
Enter month (Januaryâ€“June) or 'all':  all
Enter day (Mondayâ€“Sunday) or 'all':  sunday


----------------------------------------

Calculating The Most Frequent Times of Travel...

Most Common Month: April
Most Common Day of Week: Sunday
Most Common Start Hour: 14:00

This took 0.008477449417114258 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

Most Common Start Station: West St & Chambers St
Most Common End Station: Central Park S & 6 Ave
Most Frequent Trip: Central Park S & 6 Ave -> Central Park S & 6 Ave

This took 0.028287649154663086 seconds.
----------------------------------------

Calculating Trip Duration...

Total Travel Time: 456 days, 21 hours, 51 minutes, 19 seconds
Average Travel Time: 18 minutes, 12 seconds

This took 0.0004165172576904297 seconds.
----------------------------------------

Calculating User Stats...

User Types:
  Subscriber: 28409
  Customer: 7590

Gender Breakdown:
  Male: 20657
  Female: 8412

Birth Year Stats:
  Oldest rider was born in 1885 (age 140)
  Youngest rider was born in 2001


Would you like to see 5 lines of raw data? Enter yes or no:  yes


    Unnamed: 0          Start Time             End Time  Trip Duration  \
0      5688089 2017-06-11 14:55:05  2017-06-11 15:08:21            795   
8      2271331 2017-04-02 08:02:36  2017-04-02 09:28:08           5132   
10     2287178 2017-04-02 14:37:20  2017-04-02 14:56:12           1131   
19        5857 2017-01-01 13:32:39  2017-01-01 13:49:57           1038   
24     2905932 2017-04-16 17:36:06  2017-04-16 18:02:52           1605   

              Start Station             End Station   User Type Gender  \
0   Suffolk St & Stanton St  W Broadway & Spring St  Subscriber   Male   
8    Central Park S & 6 Ave  Central Park S & 6 Ave    Customer    NaN   
10  Bank St & Washington St   Little West St & 1 Pl    Customer    NaN   
19          W 22 St & 8 Ave         W 45 St & 6 Ave    Customer    NaN   
24    Allen St & Stanton St     Mott St & Prince St  Subscriber   Male   

    Birth Year  month day_of_week  hour  \
0       1998.0      6      Sunday    14   
8          NaN      4   


Would you like to see 5 lines of raw data? Enter yes or no:  no

Would you like to restart? Enter yes or no:  yes


Hello! Let's explore some US bikeshare data!


Enter city (Chicago, New York City, Washington):  washington
Enter month (Januaryâ€“June) or 'all':  january
Enter day (Mondayâ€“Sunday) or 'all':  wednesday


----------------------------------------

Calculating The Most Frequent Times of Travel...

Most Common Month: January
Most Common Day of Week: Wednesday
Most Common Start Hour: 8:00

This took 0.006098508834838867 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

Most Common Start Station: Columbus Circle / Union Station
Most Common End Station: Columbus Circle / Union Station
Most Frequent Trip: Massachusetts Ave & Dupont Circle NW -> 15th & P St NW

This took 0.00518798828125 seconds.
----------------------------------------

Calculating Trip Duration...

Total Travel Time: 51 days, 16 hours, 31 minutes, 21 seconds
Average Travel Time: 14 minutes, 6 seconds

This took 0.0002732276916503906 seconds.
----------------------------------------

Calculating User Stats...

User Types:
  Subscriber: 4812
  Customer: 461

Gender: Data not available for this city

Birth Year: Data not available for this city

This took 0.0008554458618164062 


Would you like to see 5 lines of raw data? Enter yes or no:  no

Would you like to restart? Enter yes or no:  no



Thanks for exploring the bikeshare data! Have a great day! ðŸš´
