In [13]:
import os
import pandas as pd

# Set the directory containing your .txt files
input_dir = "./data"
output_dir = "./final_data"

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Loop through all files in the directory
for filename in os.listdir(input_dir):
    if filename.endswith(".txt"):
        txt_path = os.path.join(input_dir, filename)
        csv_filename = filename.replace(".txt", ".csv")
        csv_path = os.path.join(output_dir, csv_filename)

        # Read the .txt file as CSV (comma-separated)
        print(f"Reading {filename}...")
        df = pd.read_csv(txt_path, header=None)  # no header assumed

        # Save to .csv
        df.to_csv(csv_path, index=False, header=False)

        print(f"Converted {filename} to {csv_filename}")


Reading data low rpm.txt...
Converted data low rpm.txt to data low rpm.csv
Reading mal data at high rpm using job.txt...
Converted mal data at high rpm using job.txt to mal data at high rpm using job.csv
Reading mal data at high rpm using newer drill bit using wood.txt...
Converted mal data at high rpm using newer drill bit using wood.txt to mal data at high rpm using newer drill bit using wood.csv
Reading high rpm data.txt...
Converted high rpm data.txt to high rpm data.csv
Reading mal data using new drill bit with brick at low rpm.txt...
Converted mal data using new drill bit with brick at low rpm.txt to mal data using new drill bit with brick at low rpm.csv
Reading MAL DATA AT HIGH RPM.txt...
Converted MAL DATA AT HIGH RPM.txt to MAL DATA AT HIGH RPM.csv
Reading MAL DATA USING WOODEN JOB 125.txt...
Converted MAL DATA USING WOODEN JOB 125.txt to MAL DATA USING WOODEN JOB 125.csv
Reading GAOCHENG DATA USING WORN OUT DRILL AT LOW RPM WITH WOOD.txt...
Converted GAOCHENG DATA USING WORN 

data low rpm ---- safe

GAOCHENG DATA USING WORN OUT DRILL AT LOW RPM WITH WOOD  ---- unsafe

high rpm data ---- unsafe

mal data at high rpm using bricks  ---- unsafe

mal data at high rpm using job ---- unsafe

mal data at high rpm using newer drill bit using brick ---- unsafe

mal data at high rpm using newer drill bit using wood ---- unsafe

MAL DATA AT HIGH RPM ---- unsafe

mal data at low rpm using bricks ---- unsafe

mal data at low rpm using wood in new drill bit ---- safe

MAL DATA AT LOW RPM WITHOUT JOB  ---- unsafe

mal data using new drill bit with brick at low rpm ---- safe

MAL DATA USING WOODEN JOB 125 ---- safe

In [17]:
import pandas as pd
import os

# Folder containing your CSV files
data_dir = "./final_data"  # Change this to your folder

# Mapping from filename (no extension) to label
file_label_map = {
    "data low rpm": "safe",
    "GAOCHENG DATA USING WORN OUT DRILL AT LOW RPM WITH WOOD": "unsafe",
    "high rpm data": "unsafe",
    "mal data at high rpm using bricks": "unsafe",
    "mal data at high rpm using job": "unsafe",
    "mal data at high rpm using newer drill bit using brick": "unsafe",
    "mal data at high rpm using newer drill bit using wood": "unsafe",
    "MAL DATA AT HIGH RPM": "unsafe",
    "mal data at low rpm using bricks": "unsafe",
    "mal data at low rpm using wood in new drill bit": "safe",
    "MAL DATA AT LOW RPM WITHOUT JOB": "unsafe",
    "mal data using new drill bit with brick at low rpm": "safe",
    "MAL DATA USING WOODEN JOB 125": "safe"
}

# Container for all labeled data
labeled_dfs = []

for filename in os.listdir(data_dir):
    if filename.endswith(".csv"):
        filepath = os.path.join(data_dir, filename)
        file_key = filename[:-4].strip()  # Remove '.csv'

        label = file_label_map.get(file_key)
        if label is None:
            print(f"⚠️  Skipping file: '{filename}' (label not found)")
            continue

        # Read file, assuming no header
        df = pd.read_csv(filepath, header=None, names=["col1", "col2", "col3"])
        df["label"] = label
        labeled_dfs.append(df)

# Combine all into one DataFrame
combined_df = pd.concat(labeled_dfs, ignore_index=True)

# Save to final CSV
combined_df.to_csv("combined_labeled_data.csv", index=False)
print("✅ File saved as 'combined_labeled_data.csv'")


✅ File saved as 'combined_labeled_data.csv'


In [18]:
# Convert label to numeric: safe → 0, unsafe → 1
combined_df["label"] = combined_df["label"].map({"safe": 0, "unsafe": 1})

# Save to CSV again with numeric labels
combined_df.to_csv("combined_labeled_data_numeric.csv", index=False)
print("✅ Numeric-labeled file saved as 'combined_labeled_data_numeric.csv'")


✅ Numeric-labeled file saved as 'combined_labeled_data_numeric.csv'
