In [None]:
# Import necessary modules
import os  # For interacting with the operating system
import glob  # For finding files and directories matching a pattern
import subprocess  # For running subprocesses
import pandas as pd  # For data manipulation and analysis

In [None]:

dataset_folder = "./dataset"

# Check if the dataset folder is not empty
if os.listdir(dataset_folder):
    print("The test folder is not empty. Deleting all files...")
    # Iterate over all files in the dataset folder
    for file in os.listdir(dataset_folder):
        file_path = os.path.join(dataset_folder, file)
        # Remove the file if it is a regular file
        if os.path.isfile(file_path):
            os.remove(file_path)
        # Remove the directory if it is a directory
        elif os.path.isdir(file_path):
            os.rmdir(file_path)
    print("All files in the test folder have been deleted.")
else:
    print("The test folder is empty.")

print("Downloading dataset...")
# Download the dataset using Kaggle API
subprocess.run(["kaggle", "datasets", "download", "-d", "aadimator/nyc-automated-traffic-volume-counts", "-p", dataset_folder], check=True)
print("Download completed.")

print("Unzipping the dataset...")
# Unzip the downloaded dataset
subprocess.run(["tar", "-xf", os.path.join(dataset_folder, "nyc-automated-traffic-volume-counts.zip"), "-C", dataset_folder], check=True)
print("Unzipping completed.")

print("Removing the zip file...")
# Remove the zip file after extraction
os.remove(os.path.join(dataset_folder, "nyc-automated-traffic-volume-counts.zip"))
print("Zip file removed.")

print("Listing directory contents...")
# List the contents of the dataset folder
files = os.listdir(dataset_folder)
print("Directory contents:", files)

# Find all CSV files in the dataset folder
csv_files = glob.glob(os.path.join(dataset_folder, "*.csv"))
if csv_files:
    old_name = csv_files[0]
    new_name = os.path.join(dataset_folder, "ATVC_NYC.csv")
    # Rename the first CSV file found to "ATVC_NYC.csv"
    os.rename(old_name, new_name)
    print(f"Renamed {old_name} to {new_name}")
else:
    print("No CSV file found in the directory.")


In [None]:
# Load the dataset from the CSV file
data = pd.read_csv("./dataset/ATVC_NYC.csv")
print("Dataset loaded successfully.")

# Loop through each year from 2011 to 2020
for year in range(2011, 2021):
    # Filter the data for the current year
    yearly_data = data[data['Yr'] == year]
    # Save the filtered data to a new CSV file named after the year
    yearly_data.to_csv(f"./dataset/{year}.csv", index=False)

print("Data split by year and saved to separate CSV files.")