# **Library Setup**

In [1]:
import pandas as pd
import numpy as np
import glob
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import signal  
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import confusion_matrix, classification_report



# **Data Load and lebeling the data**

In [9]:
import pandas as pd
import os


base_path = "/kaggle/input/datasets/mubashirjawad01/multi-channel-semg-signals-of-hand-gestures" 

# an empty list to store all processed data 
all_subjects_data = []

# matlab code logic from dataset file
fs = 2000  
segment_duration = 6  
rep_coeffs = {0: 4, 1: 138, 2: 272, 3: 406, 4: 540}

print("Starting data processing...\n")

# Starting loop from 1 to 40
for subject_id in range(1, 41):
    # creating file name (like: 1_filtered.csv, 2_filtered.csv)
    file_name = f"{subject_id}_filtered.csv"
    file_path = os.path.join(base_path, file_name)
    
    # checking the file existance 
    if not os.path.exists(file_path):
        print(f"warning: {file_name} not found! skipping...")
        continue
        
    print(f"Subject {subject_id} lebeling the data...")
    
    # data load
    df = pd.read_csv(file_path, header=None)
    df.columns = ['ch1', 'ch2', 'ch3', 'ch4']
    
    df['gesture'] = -1
    df['repetition'] = -1
    df['subject'] = subject_id  # dynamic subject id (1-40)
    
    # gesture and repetition 
    for rep in range(5):
        rep_coeff = rep_coeffs[rep]
        for gesture in range(10):
            start_sec = rep_coeff + (gesture * 10)
            end_sec = start_sec + segment_duration
            
            start_index = start_sec * fs
            end_index = end_sec * fs
            
            df.loc[start_index:end_index-1, 'gesture'] = gesture
            df.loc[start_index:end_index-1, 'repetition'] = rep

    # unnecessary data cleaning and only keeping the gesture data 
    labeled_df = df[df['gesture'] != -1].copy()
    
    # processing and adding to the data list 
    all_subjects_data.append(labeled_df)

# adding all datasets together (Concatenate)
print("\nadding all datasets together...")
final_dataset = pd.concat(all_subjects_data, ignore_index=True)

# saving the final file
output_filename = "all_40_subjects_labeled_data.csv"
final_dataset.to_csv(output_filename, index=False)

print(f"\nprocessing done! total {len(all_subjects_data)}file merged")
print(f"new file name: '{output_filename}'")
print(f"new dataset size: {final_dataset.shape}")

Starting data processing...

Subject 1 lebeling the data...
Subject 2 lebeling the data...
Subject 3 lebeling the data...
Subject 4 lebeling the data...
Subject 5 lebeling the data...
Subject 6 lebeling the data...
Subject 7 lebeling the data...
Subject 8 lebeling the data...
Subject 9 lebeling the data...
Subject 10 lebeling the data...
Subject 11 lebeling the data...
Subject 12 lebeling the data...
Subject 13 lebeling the data...
Subject 14 lebeling the data...
Subject 15 lebeling the data...
Subject 16 lebeling the data...
Subject 17 lebeling the data...
Subject 18 lebeling the data...
Subject 19 lebeling the data...
Subject 20 lebeling the data...
Subject 21 lebeling the data...
Subject 22 lebeling the data...
Subject 23 lebeling the data...
Subject 24 lebeling the data...
Subject 25 lebeling the data...
Subject 26 lebeling the data...
Subject 27 lebeling the data...
Subject 28 lebeling the data...
Subject 29 lebeling the data...
Subject 30 lebeling the data...
Subject 31 lebeling 

In [10]:
final_dataset.head()

Unnamed: 0,ch1,ch2,ch3,ch4,gesture,repetition,subject
0,-0.008538,-0.022824,0.00815,0.005416,0,0,1
1,-0.007186,-0.015051,-0.001013,0.004787,0,0,1
2,-0.01087,-0.01146,-0.011223,0.003596,0,0,1
3,-0.016778,-0.006743,-0.018296,0.001529,0,0,1
4,-0.022614,0.001419,-0.018439,-0.001502,0,0,1


# **converting the sEMG data to spectogram**

In [12]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import signal
import os

#  Load the labeled dataset (the file you saved previously)
print("Loading data...")
df = pd.read_csv("/kaggle/working/all_40_subjects_labeled_data.csv")
fs = 2000 # Sampling rate

#  Create the main output directory and sub-directories for each gesture
output_dir = "sEMG_Images"
os.makedirs(output_dir, exist_ok=True)
for i in range(10):  # 10 folders for 10 distinct gestures
    os.makedirs(os.path.join(output_dir, f"gesture_{i}"), exist_ok=True)

print("Started generating images... (This might take a while)")

# Group the data by Subject, Repetition, and Gesture
grouped = df.groupby(['subject', 'repetition', 'gesture'])

for (subj, rep, gest), group in grouped:
    # We are using Channel 1 (ch1) data to create the spectrogram.
    # (You can change it to ch2, ch3, or the average of all 4 channels if needed)
    sig = group['ch1'].values
    
    # Generate the spectrogram
    f, t, Sxx = signal.spectrogram(sig, fs=fs, nperseg=256, noverlap=128)
    
    # Plot as an image (without axes or borders to get a pure image)
    plt.figure(figsize=(4, 4))
    
    # Using 'jet' colormap, which represents the signal intensity in blue-red-green colors
    plt.pcolormesh(t, f, 10 * np.log10(Sxx + 1e-10), shading='gouraud', cmap='jet')
    plt.axis('off') # Hide x and y axes
    
    # Save the image
    img_name = f"subj_{subj}_rep_{rep}.png"
    img_path = os.path.join(output_dir, f"gesture_{int(gest)}", img_name)
    
    plt.savefig(img_path, bbox_inches='tight', pad_inches=0)
    plt.close()

Loading data...
Started generating images... (This might take a while)


# **converting the sEMG data to Signal Graph**

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

# 1. Load the labeled dataset
print("Loading data...")
df = pd.read_csv("all_40_subjects_labeled_data.csv")

# 2. Create the main output directory and sub-directories for each gesture
output_dir = "sEMG_Signal_Plots"
os.makedirs(output_dir, exist_ok=True)
for i in range(10):  # 10 folders for 10 distinct gestures
    os.makedirs(os.path.join(output_dir, f"gesture_{i}"), exist_ok=True)

print("Started generating signal plot images... (This might take a while)")

# 3. Group the data by Subject, Repetition, and Gesture
grouped = df.groupby(['subject', 'repetition', 'gesture'])

for (subj, rep, gest), group in grouped:
    
    sig_ch1 = group['ch1'].values
    sig_ch2 = group['ch2'].values
    sig_ch3 = group['ch3'].values
    sig_ch4 = group['ch4'].values
    
    # Plot as an image (4x4 inches size)
    plt.figure(figsize=(4, 4))
    
    # plotting with different colors 
    plt.plot(sig_ch1, color='blue', alpha=0.8, linewidth=0.5)
    plt.plot(sig_ch2, color='red', alpha=0.8, linewidth=0.5)
    plt.plot(sig_ch3, color='green', alpha=0.8, linewidth=0.5)
    plt.plot(sig_ch4, color='orange', alpha=0.8, linewidth=0.5)
    
    plt.axis('off') # Hide x and y axes 
    
    # Save the image
    img_name = f"subj_{subj}_rep_{rep}.png"
    img_path = os.path.join(output_dir, f"gesture_{int(gest)}", img_name)
    
    # only saving the plotting 
    plt.savefig(img_path, bbox_inches='tight', pad_inches=0)
    plt.close() # Close the plot to save memory

print(f"All images successfully saved in the '{output_dir}' folder!")

Loading data...
Started generating signal plot images... (This might take a while)
All images successfully saved in the 'sEMG_Signal_Plots' folder!
