In [2]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
#loading data
import pandas as pd
import os

def load_emg_data(folder_path):
    data = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.txt'):
            file_path = os.path.join(folder_path, filename)
            try:
                with open(file_path, 'r') as file:
                    lines = file.readlines()

                    # Find where the actual data starts
                    for i, line in enumerate(lines):
                        if line.startswith('-') or line[0].isdigit():
                            data_start_idx = i
                            break

                    # Load the data part into a DataFrame
                    df = pd.read_csv(file_path, delim_whitespace=True, header=None, skiprows=data_start_idx, on_bad_lines='skip')
                    data.append(df)
            except Exception as e:
                print(f"Error processing {file_path}: {e}")
    return data

# Load data from A_TXT and N_TXT folders
a_txt_data = load_emg_data('/content/drive/MyDrive/SEMG_DB1/A_TXT')
n_txt_data = load_emg_data('/content/drive/MyDrive/SEMG_DB1/N_TXT')

# Verify the loaded data
print(f"Loaded {len(a_txt_data)} files from A_TXT")
print(f"Loaded {len(n_txt_data)} files from N_TXT")
print(a_txt_data[0].head())  # Display the first few rows of the first file to verify
print(n_txt_data[0].head())  # Display the first few rows of the first file to verify





Loaded 33 files from A_TXT
Loaded 33 files from N_TXT
        0       1       2       3    4
0  0.0000  0.0052  0.0007 -0.0030 -4.6
1 -0.0015 -0.0068  0.0030 -0.0091 -4.5
2 -0.0053 -0.0458 -0.0008 -0.0098 -4.6
3 -0.0068 -0.0398 -0.0008 -0.0068 -4.6
4 -0.0030 -0.0368 -0.0038 -0.0046 -4.5
        0       1       2       3    4
0 -0.0030 -0.0030 -0.0060 -0.0015  5.7
1  0.0037 -0.0083 -0.0091 -0.0008  5.8
2  0.0045 -0.0120 -0.0098  0.0000  5.9
3  0.0015 -0.0113 -0.0105  0.0015  5.8
4  0.0015 -0.0075 -0.0098  0.0045  6.1


In [18]:
#analyse
def analyze_data(data):
    for i, df in enumerate(data):
        print(f"File {i+1}:")
        print(f"Shape: {df.shape}")
        print("Summary Statistics:")
        print(df.describe())
        print("\n")

# Analyze data from A_TXT and N_TXT folders
print("A_TXT Data Analysis:")
analyze_data(a_txt_data)
print("N_TXT Data Analysis:")
analyze_data(n_txt_data)


A_TXT Data Analysis:
File 1:
Shape: (11403, 5)
Summary Statistics:
                  0             1             2             3             4
count  11403.000000  11403.000000  11403.000000  11403.000000  11403.000000
mean      -0.000336     -0.000661     -0.000474      0.000591    -58.798693
std        0.006402      0.034430      0.012199      0.069221     44.092145
min       -0.082500     -0.334500     -0.115500     -0.563300   -113.300000
25%       -0.003000     -0.008300     -0.004600     -0.009800   -107.200000
50%        0.000000      0.000700     -0.000800      0.001500    -62.300000
75%        0.003000      0.009000      0.003700      0.016500     -8.900000
max        0.053200      0.202500      0.208500      0.445500     -2.900000


File 2:
Shape: (12973, 5)
Summary Statistics:
                  0             1             2             3             4
count  12973.000000  12973.000000  12973.000000  12973.000000  12973.000000
mean      -0.000139     -0.000626     -0.000609  

In [19]:
#Normalize the Data
from sklearn.preprocessing import MinMaxScaler

def normalize_data(data):
    scaler = MinMaxScaler()
    normalized_data = []
    for df in data:
        normalized_df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
        normalized_data.append(normalized_df)
    return normalized_data

# Normalize data from A_TXT and N_TXT folders
normalized_a_txt_data = normalize_data(a_txt_data)
normalized_n_txt_data = normalize_data(n_txt_data)

# Verify normalization
print("Normalized A_TXT Data Example:")
print(normalized_a_txt_data[0].head())
print("Normalized N_TXT Data Example:")
print(normalized_n_txt_data[0].head())


Normalized A_TXT Data Example:
          0         1         2         3         4
0  0.607959  0.632588  0.358642  0.555412  0.984601
1  0.596905  0.610242  0.365741  0.549366  0.985507
2  0.568902  0.537616  0.354012  0.548672  0.984601
3  0.557848  0.548790  0.354012  0.551646  0.984601
4  0.585851  0.554376  0.344753  0.553826  0.985507
Normalized N_TXT Data Example:
          0         1         2         3         4
0  0.438623  0.514107  0.405286  0.464623  0.032702
1  0.538922  0.503030  0.371145  0.465723  0.033563
2  0.550898  0.495298  0.363436  0.466981  0.034423
3  0.505988  0.496761  0.355727  0.469340  0.033563
4  0.505988  0.504702  0.363436  0.474057  0.036145


In [6]:
#Segment the Data
def segment_data(data, window_size=100, overlap=50):
    segments = []
    for df in data:
        num_segments = (len(df) - window_size) // overlap + 1
        for i in range(num_segments):
            start = i * overlap
            end = start + window_size
            segment = df.iloc[start:end].reset_index(drop=True)
            segments.append(segment)
    return segments

# Segment data from A_TXT and N_TXT folders
segmented_a_txt_data = segment_data(normalized_a_txt_data)
segmented_n_txt_data = segment_data(normalized_n_txt_data)

# Verify segmentation
print(f"Number of segments from A_TXT: {len(segmented_a_txt_data)}")
print(f"Number of segments from N_TXT: {len(segmented_n_txt_data)}")
print("Example segment from A_TXT:")
print(segmented_a_txt_data[0].head())
print("Example segment from N_TXT:")
print(segmented_n_txt_data[0].head())


Number of segments from A_TXT: 16961
Number of segments from N_TXT: 9448
Example segment from A_TXT:
          0         1         2         3         4
0  0.607959  0.632588  0.358642  0.555412  0.984601
1  0.596905  0.610242  0.365741  0.549366  0.985507
2  0.568902  0.537616  0.354012  0.548672  0.984601
3  0.557848  0.548790  0.354012  0.551646  0.984601
4  0.585851  0.554376  0.344753  0.553826  0.985507
Example segment from N_TXT:
          0         1         2         3         4
0  0.438623  0.514107  0.405286  0.464623  0.032702
1  0.538922  0.503030  0.371145  0.465723  0.033563
2  0.550898  0.495298  0.363436  0.466981  0.034423
3  0.505988  0.496761  0.355727  0.469340  0.033563
4  0.505988  0.504702  0.363436  0.474057  0.036145


In [7]:
#Extract Features
import numpy as np

def extract_features(segment):
    features = {}
    # Compute mean
    features['mean'] = segment.mean().values
    # Compute standard deviation
    features['std'] = segment.std().values
    # Compute root mean square (RMS)
    features['rms'] = np.sqrt(np.mean(segment**2, axis=0)).values
    # Compute number of zero crossings
    zero_crossings = ((segment.shift(1) * segment) < 0).sum().values
    features['zero_crossings'] = zero_crossings
    return features

def extract_features_from_segments(segments):
    all_features = []
    for segment in segments:
        features = extract_features(segment)
        all_features.append(features)
    return all_features

# Extract features from segmented data
features_a_txt = extract_features_from_segments(segmented_a_txt_data)
features_n_txt = extract_features_from_segments(segmented_n_txt_data)

# Convert the features to DataFrame for easier manipulation and analysis
features_a_txt_df = pd.DataFrame(features_a_txt)
features_n_txt_df = pd.DataFrame(features_n_txt)

# Verify feature extraction
print(f"Extracted features for A_TXT data: {features_a_txt_df.shape}")
print(f"Extracted features for N_TXT data: {features_n_txt_df.shape}")
print("Example features from A_TXT:")
print(features_a_txt_df.head())
print("Example features from N_TXT:")
print(features_n_txt_df.head())


Extracted features for A_TXT data: (16961, 4)
Extracted features for N_TXT data: (9448, 4)
Example features from A_TXT:
                                                mean  \
0  [0.6039646278555639, 0.6197169459962756, 0.355...   
1  [0.6083419307295507, 0.6198957169459963, 0.354...   
2  [0.6084377302873987, 0.621756052141527, 0.3547...   
3  [0.6089977892409729, 0.6223519553072626, 0.355...   
4  [0.6085556374355198, 0.6205735567970204, 0.355...   

                                                 std  \
0  [0.03790881751850861, 0.02397902634761795, 0.0...   
1  [0.03530625032816435, 0.018151916259237656, 0....   
2  [0.028020253675153754, 0.023624055642709935, 0...   
3  [0.02540834496595841, 0.019608098953528937, 0....   
4  [0.02860061941890742, 0.012278797352733158, 0....   

                                                 rms   zero_crossings  
0  [0.6051412887598274, 0.6201760531675541, 0.356...  [0, 0, 0, 0, 0]  
1  [0.6093553730647412, 0.6201587676000673, 0.354...  [0, 0, 0

In [17]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

def preprocess_data(data_list):
    combined_data = pd.concat(data_list, ignore_index=True)
    combined_data = combined_data.iloc[:, 1:]  # Skip metadata
    return combined_data

# Load and preprocess data
a_txt_data_preprocessed = preprocess_data(a_txt_data)
n_txt_data_preprocessed = preprocess_data(n_txt_data)

# Check the number of columns in each dataset
print("Columns in A_TXT dataset:", a_txt_data_preprocessed.shape[1])
print("Columns in N_TXT dataset:", n_txt_data_preprocessed.shape[1])

# Ensure both datasets have the same number of columns
if a_txt_data_preprocessed.shape[1] != n_txt_data_preprocessed.shape[1]:
    # Align column counts
    min_columns = min(a_txt_data_preprocessed.shape[1], n_txt_data_preprocessed.shape[1])
    a_txt_data_preprocessed = a_txt_data_preprocessed.iloc[:, :min_columns]
    n_txt_data_preprocessed = n_txt_data_preprocessed.iloc[:, :min_columns]

# Convert DataFrames to numpy arrays
features_a_txt_array = np.array(a_txt_data_preprocessed)
features_n_txt_array = np.array(n_txt_data_preprocessed)

# Combine data for normalization
combined_features = np.vstack([features_a_txt_array, features_n_txt_array])

# Normalize the features to be between -1 and 1
scaler = MinMaxScaler(feature_range=(-1, 1))
combined_features_normalized = scaler.fit_transform(combined_features)

# Convert to TensorFlow tensors
import tensorflow as tf
features_tensor = tf.convert_to_tensor(combined_features_normalized, dtype=tf.float32)

# Shuffle data and split into training and validation sets
train_data, val_data = train_test_split(features_tensor, test_size=0.2, shuffle=True)

# Define GAN models
def build_generator():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, input_dim=train_data.shape[1]),
        tf.keras.layers.LeakyReLU(0.2),
        tf.keras.layers.Dense(256),
        tf.keras.layers.LeakyReLU(0.2),
        tf.keras.layers.Dense(512),
        tf.keras.layers.LeakyReLU(0.2),
        tf.keras.layers.Dense(train_data.shape[1], activation='tanh')
    ])
    return model

def build_discriminator():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(512, input_dim=train_data.shape[1]),
        tf.keras.layers.LeakyReLU(0.2),
        tf.keras.layers.Dense(256),
        tf.keras.layers.LeakyReLU(0.2),
        tf.keras.layers.Dense(128),
        tf.keras.layers.LeakyReLU(0.2),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    return model

def build_gan(generator, discriminator):
    discriminator.trainable = False
    model = tf.keras.Sequential([generator, discriminator])
    return model

# Compile the models
def compile_models(generator, discriminator, gan):
    discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    gan.compile(loss='binary_crossentropy', optimizer='adam')

# Training function
def train_gan(gan, generator, discriminator, data, epochs=10000, batch_size=64):
    for epoch in range(epochs):
        # Train discriminator
        idx = np.random.randint(0, data.shape[0], batch_size)
        real_samples = data[idx]
        noise = np.random.randn(batch_size, train_data.shape[1])
        fake_samples = generator.predict(noise)

        real_labels = np.ones((batch_size, 1))
        fake_labels = np.zeros((batch_size, 1))

        d_loss_real = discriminator.train_on_batch(real_samples, real_labels)
        d_loss_fake = discriminator.train_on_batch(fake_samples, fake_labels)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train generator
        noise = np.random.randn(batch_size, train_data.shape[1])
        g_loss = gan.train_on_batch(noise, real_labels)

        # Print progress
        if (epoch + 1) % 1000 == 0:
            print(f"Epoch {epoch + 1}/{epochs} | D Loss: {d_loss[0]} | D Accuracy: {100 * d_loss[1]} | G Loss: {g_loss}")

# Build and compile models
generator = build_generator()
discriminator = build_discriminator()
gan = build_gan(generator, discriminator)
compile_models(generator, discriminator, gan)

# Train GAN
train_gan(gan, generator, discriminator, train_data.numpy())




Columns in A_TXT dataset: 5
Columns in N_TXT dataset: 4


TypeError: Only integers, slices (`:`), ellipsis (`...`), tf.newaxis (`None`) and scalar tf.int32/tf.int64 tensors are valid indices, got array([1130605,  238280, 1038317, ...,  618032,  410682,  987687])