In [2]:
import pandas as pd
import os
from glob import glob

# Path to the folder containing the CSV files
csv_path = "Resources/"

# Get a list of all CSV files in the folder
all_files = glob(os.path.join(csv_path, "*.csv"))

# Output file path (compressed)
output_file = "combined_data_2024.csv"

# Open the output file in write mode and add the header only once
header_written = False

for file in all_files:
    print(f"Processing file: {file}")  # Display progress

    # Process the current file in chunks
    for chunk in pd.read_csv(file, chunksize=100000):
        print(f"Processing a chunk from {file}...")  # Display progress within the file
        
        # Convert the date columns to datetime
        chunk["started_at"] = pd.to_datetime(chunk["started_at"])
        chunk["ended_at"] = pd.to_datetime(chunk["ended_at"])
        
        # Drop rows with missing values (incomplete data)
        chunk = chunk.dropna()
        
        # Save the chunk to the output file
        chunk.to_csv(output_file, mode='a', index=False, header=not header_written)
        header_written = True

print(f"Data processing complete. Combined file saved as '{output_file}'.")


Processing file: Resources\JC-202212-citibike-tripdata.csv
Processing a chunk from Resources\JC-202212-citibike-tripdata.csv...
Processing file: Resources\JC-202301-citibike-tripdata.csv
Processing a chunk from Resources\JC-202301-citibike-tripdata.csv...
Processing file: Resources\JC-202302-citibike-tripdata.csv
Processing a chunk from Resources\JC-202302-citibike-tripdata.csv...
Processing file: Resources\JC-202303-citibike-tripdata.csv
Processing a chunk from Resources\JC-202303-citibike-tripdata.csv...
Processing file: Resources\JC-202304-citibike-tripdata.csv
Processing a chunk from Resources\JC-202304-citibike-tripdata.csv...
Processing file: Resources\JC-202305-citibike-tripdata.csv
Processing a chunk from Resources\JC-202305-citibike-tripdata.csv...
Processing file: Resources\JC-202306-citibike-tripdata.csv
Processing a chunk from Resources\JC-202306-citibike-tripdata.csv...
Processing file: Resources\JC-202307-citibike-tripdata.csv
Processing a chunk from Resources\JC-202307-c