# Merge Dataset

This notebook contains code for merging the [Kaggle CNC Mill Tool Wear dataset](https://www.kaggle.com/datasets/shasun/tool-wear-detection-in-cnc-mill).

In [None]:
import os
from zipfile import ZipFile

import pandas as pd

# Local file path to dataset
LOCAL_DATA_PATH = "./data"

In [None]:
with ZipFile(f"{LOCAL_DATA_PATH}/archive.zip", "r") as zipObj:
    # Extract all the contents of zip file in current directory
    zipObj.extractall(LOCAL_DATA_PATH)

In [None]:
df_train_csv = pd.read_csv(os.path.join(LOCAL_DATA_PATH, "train.csv"))

experiment_ids = list(df_train_csv["No"].unique())

li_df_experiments = []

for id in experiment_ids:
    filename = (
        f"experiment_{id:0>2d}.csv"  # Pad number with zeros (left padding, width 2)
    )
    df = pd.read_csv(os.path.join(LOCAL_DATA_PATH, filename), index_col=None)
    df["No"] = id
    df = df.merge(df_train_csv, how="left", on="No")

    li_df_experiments.append(df)

df_experiments = pd.concat(li_df_experiments, axis=0, ignore_index=True)
df_experiments.drop(
    columns=["No", "machining_finalized", "passed_visual_inspection"],
    errors="ignore",
    inplace=True,
)
df_experiments.to_csv(os.path.join(LOCAL_DATA_PATH, "tool_wear.csv"), index=False)