Bike Usage

In [7]:
import pandas as pd

def calculate_bike_usage(df):
    # Calculate the net change in bikes
    df['BIKE_USAGE'] = df['AVAILABLE_BIKES'] - df['AVAILABLE_BIKES'].shift(1)
    
    # Handle missing values in the first row
    df['BIKE_USAGE'] = df['BIKE_USAGE'].fillna(0)
    
    # Adjust for negative values
    df['BIKE_USAGE'] = df['BIKE_USAGE'].apply(lambda x: max(0, x))
    
    return df

# Load each CSV file
file_paths = ['data/filtprepandemic.csv', 'data/filtpandemic.csv', 'data/filtpostpandemic.csv']
for file_path in file_paths:
    df = pd.read_csv(file_path, parse_dates=['TIME'])
    
    # Apply the bike usage calculation function
    df = calculate_bike_usage(df)
    
    # Save the updated DataFrame back to the CSV file
    df.to_csv(file_path, index=False)


Daily Sum by Station

In [8]:
import os
import pandas as pd

# Load each CSV file
file_paths = ['data/filtprepandemic.csv', 'data/filtpandemic.csv', 'data/filtpostpandemic.csv']

# Process each CSV file
for file_path in file_paths:
    
    df = pd.read_csv(file_path, parse_dates=['TIME'])

    # Calculate the daily sum
    df_daily = df.groupby(['STATION_ID', pd.Grouper(key='TIME', freq='D')])['BIKE_USAGE'].sum().reset_index()

    # Modify the file name for saving
    file_name = os.path.basename(file_path)
    new_file_name = file_name.replace('filt', 'final').replace('.csv', '_daily_sum.csv')

    # Save the daily sum data back to a new CSV file
    new_file_path = os.path.join('data', new_file_name)
    df_daily.to_csv(new_file_path, index=False)



Citywide Daily Sum

In [9]:
import os
import pandas as pd

# Load each CSV file
file_paths = ['data/finalprepandemic_daily_sum.csv', 'data/finalpandemic_daily_sum.csv', 'data/finalpostpandemic_daily_sum.csv']

# Process each CSV file
for file_path in file_paths:
    
    df = pd.read_csv(file_path, parse_dates=['TIME'])

    # Calculate the daily sum for the whole city
    df_city_daily = df.groupby('TIME')['BIKE_USAGE'].sum().reset_index()

    # Modify the file name for saving
    file_name = os.path.basename(file_path)
    new_file_name = file_name.replace('final', 'final_citywide')

    # Save the city-wide daily sum data back to a new CSV file
    new_file_path = os.path.join('data', new_file_name)
    df_city_daily.to_csv(new_file_path, index=False)


Combining all files 

In [13]:
import os
import pandas as pd

# Load each CSV file
file_paths = ['data/final_citywidepre_daily_sum.csv', 'data/final_citywidepan_daily_sum.csv', 'data/final_citywidepost_daily_sum.csv']

# Initialize an empty list to store DataFrames
dfs = []

# Process each CSV file and combine the data
for file_path in file_paths:
    
    df = pd.read_csv(file_path, parse_dates=['TIME'])
    dfs.append(df)

# Concatenate the list of DataFrames into a single DataFrame
combined_df = pd.concat(dfs, ignore_index=True)

# Save the combined data to a new CSV file
combined_file_path = 'data/combined_citywide_daily_sum.csv'
combined_df.to_csv(combined_file_path, index=False)
