CSV Data Mapper & Concatenation 

In [13]:
import pandas as pd
import os

# Define the directory where your CSV files are stored
directory = "csv_letters"

# Initialize an empty list to store the DataFrames
dataframes = []

# Define the mappings for each categorical feature
mappings = {
    "Thumb Curl": {"No Curl": 0, "Half Curl": 1, "Full Curl": 2},
    "Thumb Direction": {
        "Vertical Up": 0,
        "Diagonal Up Right": 1,
        "Diagonal Up Left": 2,
        "Horizontal Right": 3,
        "Horizontal Left": 4,
    },
    "Index Curl": {"No Curl": 0, "Half Curl": 1, "Full Curl": 2},
    "Index Direction": {
        "Vertical Up": 0,
        "Diagonal Up Right": 1,
        "Diagonal Up Left": 2,
        "Horizontal Right": 3,
        "Horizontal Left": 4,
    },
    "Middle Curl": {"No Curl": 0, "Half Curl": 1, "Full Curl": 2},
    "Middle Direction": {
        "Vertical Up": 0,
        "Diagonal Up Right": 1,
        "Diagonal Up Left": 2,
        "Horizontal Right": 3,
        "Horizontal Left": 4,
    },
    "Ring Curl": {"No Curl": 0, "Half Curl": 1, "Full Curl": 2},
    "Ring Direction": {
        "Vertical Up": 0,
        "Diagonal Up Right": 1,
        "Diagonal Up Left": 2,
        "Horizontal Right": 3,
        "Horizontal Left": 4,
    },
    "Pinky Curl": {"No Curl": 0, "Half Curl": 1, "Full Curl": 2},
    "Pinky Direction": {
        "Vertical Up": 0,
        "Diagonal Up Right": 1,
        "Diagonal Up Left": 2,
        "Horizontal Right": 3,
        "Horizontal Left": 4,
    },
}

# Loop over each file in the directory
for filename in os.listdir(directory):
    
    if filename.endswith(".csv"):
        # Load the CSV file with the correct delimiter
        df = pd.read_csv(os.path.join(directory, filename), delimiter=";")

        # Print column names for debugging
        #print(f"Processing {filename} with columns: {df.columns}")

        # Map the categorical features to numerical values
        for col, mapping in mappings.items():
            if col in df.columns:
                df[col] = df[col].map(mapping)
            else:
                print(f"Warning: Column {col} not found in {filename}")

        # Append the DataFrame to the list
        dataframes.append(df)

# Concatenate all the DataFrames
concatenated_df = pd.concat(dataframes, ignore_index=True)

# Save the concatenated DataFrame to a new CSV file
concatenated_df.to_csv("all_letters.csv", index=False)

# Display the first few rows of the combined data
print(concatenated_df.head())


   Thumb Curl  Thumb Direction  Index Curl  Index Direction  Middle Curl  \
0           1              1.0           0                0            0   
1           1              1.0           0                0            0   
2           1              1.0           0                0            0   
3           1              1.0           0                0            0   
4           1              1.0           0                0            0   

   Middle Direction  Ring Curl  Ring Direction  Pinky Curl  Pinky Direction  \
0                 0          0               0           1                1   
1                 0          0               0           1                1   
2                 0          0               0           1                1   
3                 0          0               0           1                0   
4                 0          0               0           1                1   

  Label  
0     W  
1     W  
2     W  
3     W  
4     W  


In [16]:
import pandas as pd
import os

# Define the directory where your CSV files are stored
directory = "csv_letters"

# Initialize an empty list to store the DataFrames
dataframes = []

# Define the mappings for each categorical feature
mappings = {
    "Thumb Curl": {"No Curl": 0, "Half Curl": 1, "Full Curl": 2},
    "Thumb Direction": {
        "Vertical Up": 0,
        "Diagonal Up Right": 1,
        "Diagonal Up Left": 2,
        "Horizontal Right": 3,
        "Horizontal Left": 4,
        "Diagonal Down Right": 5,
    },
    "Index Curl": {"No Curl": 0, "Half Curl": 1, "Full Curl": 2},
    "Index Direction": {
        "Vertical Up": 0,
        "Diagonal Up Right": 1,
        "Diagonal Up Left": 2,
        "Horizontal Right": 3,
        "Horizontal Left": 4,
        "Diagonal Down Right": 5,
    },
    "Middle Curl": {"No Curl": 0, "Half Curl": 1, "Full Curl": 2},
    "Middle Direction": {
        "Vertical Up": 0,
        "Diagonal Up Right": 1,
        "Diagonal Up Left": 2,
        "Horizontal Right": 3,
        "Horizontal Left": 4,
        "Diagonal Down Right": 5,
    },
    "Ring Curl": {"No Curl": 0, "Half Curl": 1, "Full Curl": 2},
    "Ring Direction": {
        "Vertical Up": 0,
        "Diagonal Up Right": 1,
        "Diagonal Up Left": 2,
        "Horizontal Right": 3,
        "Horizontal Left": 4,
        "Diagonal Down Right": 5,
    },
    "Pinky Curl": {"No Curl": 0, "Half Curl": 1, "Full Curl": 2},
    "Pinky Direction": {
        "Vertical Up": 0,
        "Diagonal Up Right": 1,
        "Diagonal Up Left": 2,
        "Horizontal Right": 3,
        "Horizontal Left": 4,
        "Diagonal Down Right": 5,
    },
}

# Define the label mapping (indexing letters)
label_mapping = {letter: idx for idx, letter in enumerate("ABCDEFGHIKLMNOPQRSTUVWXY")}

# Loop over each file in the directory
for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        # Load the CSV file with the correct delimiter
        df = pd.read_csv(os.path.join(directory, filename), delimiter=";")

        # Print column names for debugging
        #print(f"Processing {filename} with columns: {df.columns}")

        # Map the categorical features to numerical values
        for col, mapping in mappings.items():
            if col in df.columns:
                df[col] = df[col].map(mapping)
            else:
                print(f"Warning: Column {col} not found in {filename}")

        # Map the labels (letters) to their corresponding index
        if "Label" in df.columns:
            df["Label"] = df["Label"].map(label_mapping)
        else:
            print(f"Warning: Label column not found in {filename}")

        # Append the DataFrame to the list
        dataframes.append(df)

# Concatenate all the DataFrames
concatenated_df = pd.concat(dataframes, ignore_index=True)

# Save the concatenated DataFrame to a new CSV file
concatenated_df.to_csv("all_letters_with_label.csv", index=False)

# Display the first few rows of the combined data
print(concatenated_df.head())


   Thumb Curl  Thumb Direction  Index Curl  Index Direction  Middle Curl  \
0           1                1           0                0            0   
1           1                1           0                0            0   
2           1                1           0                0            0   
3           1                1           0                0            0   
4           1                1           0                0            0   

   Middle Direction  Ring Curl  Ring Direction  Pinky Curl  Pinky Direction  \
0                 0          0               0           1                1   
1                 0          0               0           1                1   
2                 0          0               0           1                1   
3                 0          0               0           1                0   
4                 0          0               0           1                1   

   Label  
0     21  
1     21  
2     21  
3     21  
4     21  


### Test Data \
0,2,2,2,2,0,2,0,2,0,0 -> A \
1,1,0,0,0,0,0,0,0,0,1 -> B \
0,2,1,2,1,2,1,2,1,2,2 -> C \
0,0,0,0,1,0,1,0,1,0,3 -> D \
1,1,2,0,2,0,2,0,2,1,4 -> E \
0,0,1,0,1,0,0,0,0,1,5 -> F \
1,3,1,1,2,1,2,1,2,1,6 -> G \
0,3,0,3,0,3,2,3,2,3,7 -> H \
1,0,2,0,2,0,2,0,0,0,8 -> I \
0,0,0,0,0,0,2,2,2,1,9 -> K \
0,2,0,2,2,2,2,0,2,0,10 -> L \
0,1,2,2,2,2,2,0,2,0,11 -> M \
0,0,2,2,2,2,2,0,2,0,12 -> N \
0,2,1,2,1,2,1,2,1,2,13 -> O \
0,3,0,3,1,3,2,3,2,3,14 -> P \
0,5,1,3,2,1,2,3,2,3,15 -> Q \
0,1,0,0,0,0,2,0,2,2,16 -> R \
1,0,2,0,2,0,2,0,2,0,17 -> S \
0,0,1,2,2,0,2,2,2,0,18 -> T \
1,1,0,0,0,0,2,0,2,0,19 -> U \
1,1,0,0,0,0,0,0,1,1,21 -> W \
0,3,0,1,2,1,2,3,1,3,22 -> X \
0,2,2,0,2,0,2,0,0,1,23 -> Y