In [2]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [23]:
#loading data
import pandas as pd
import os

def load_emg_data(folder_path):
    data = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.txt'):
            file_path = os.path.join(folder_path, filename)
            try:
                with open(file_path, 'r') as file:
                    lines = file.readlines()

                    # Find where the actual data starts
                    for i, line in enumerate(lines):
                        if line.startswith('-') or line[0].isdigit():
                            data_start_idx = i
                            break

                    # Load the data part into a DataFrame
                    df = pd.read_csv(file_path, delim_whitespace=True, header=None, skiprows=data_start_idx, on_bad_lines='skip')
                    data.append(df)
            except Exception as e:
                print(f"Error processing {file_path}: {e}")
    return data

# Load data from A_TXT and N_TXT folders
a_txt_data = load_emg_data('/content/drive/MyDrive/SEMG_DB1/A_TXT')
n_txt_data = load_emg_data('/content/drive/MyDrive/SEMG_DB1/N_TXT')

# Verify the loaded data
print(f"Loaded {len(a_txt_data)} files from A_TXT")
print(f"Loaded {len(n_txt_data)} files from N_TXT")
print(a_txt_data[0].head())  # Display the first few rows of the first file to verify
print(n_txt_data[0].head())  # Display the first few rows of the first file to verify





Loaded 33 files from A_TXT
Loaded 33 files from N_TXT
        0       1       2       3    4
0  0.0000  0.0052  0.0007 -0.0030 -4.6
1 -0.0015 -0.0068  0.0030 -0.0091 -4.5
2 -0.0053 -0.0458 -0.0008 -0.0098 -4.6
3 -0.0068 -0.0398 -0.0008 -0.0068 -4.6
4 -0.0030 -0.0368 -0.0038 -0.0046 -4.5
        0       1       2       3    4
0 -0.0030 -0.0030 -0.0060 -0.0015  5.7
1  0.0037 -0.0083 -0.0091 -0.0008  5.8
2  0.0045 -0.0120 -0.0098  0.0000  5.9
3  0.0015 -0.0113 -0.0105  0.0015  5.8
4  0.0015 -0.0075 -0.0098  0.0045  6.1


In [24]:
#analyse
def analyze_data(data):
    for i, df in enumerate(data):
        print(f"File {i+1}:")
        print(f"Shape: {df.shape}")
        print("Summary Statistics:")
        print(df.describe())
        print("\n")

# Analyze data from A_TXT and N_TXT folders
print("A_TXT Data Analysis:")
analyze_data(a_txt_data)
print("N_TXT Data Analysis:")
analyze_data(n_txt_data)


A_TXT Data Analysis:
File 1:
Shape: (11403, 5)
Summary Statistics:
                  0             1             2             3             4
count  11403.000000  11403.000000  11403.000000  11403.000000  11403.000000
mean      -0.000336     -0.000661     -0.000474      0.000591    -58.798693
std        0.006402      0.034430      0.012199      0.069221     44.092145
min       -0.082500     -0.334500     -0.115500     -0.563300   -113.300000
25%       -0.003000     -0.008300     -0.004600     -0.009800   -107.200000
50%        0.000000      0.000700     -0.000800      0.001500    -62.300000
75%        0.003000      0.009000      0.003700      0.016500     -8.900000
max        0.053200      0.202500      0.208500      0.445500     -2.900000


File 2:
Shape: (12973, 5)
Summary Statistics:
                  0             1             2             3             4
count  12973.000000  12973.000000  12973.000000  12973.000000  12973.000000
mean      -0.000139     -0.000626     -0.000609  

In [20]:
#Normalize the Data
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

def normalize_data(data):
    scaler = MinMaxScaler()
    normalized_data = []
    for df in data:
        # Ensure that the DataFrame has the correct number of columns
        df = df.iloc[:, :5]  # Take only the first 5 columns
        normalized_df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
        normalized_data.append(normalized_df)
    return normalized_data

# Normalize data from A_TXT and N_TXT folders
normalized_a_txt_data = normalize_data(a_txt_data)
normalized_n_txt_data = normalize_data(n_txt_data)

# Verify normalization
print("Normalized A_TXT Data Example:")
print(normalized_a_txt_data[0].head())
print("Normalized N_TXT Data Example:")
print(normalized_n_txt_data[0].head())



Normalized A_TXT Data Example:
          0         1         2         3         4
0  0.607959  0.632588  0.358642  0.555412  0.984601
1  0.596905  0.610242  0.365741  0.549366  0.985507
2  0.568902  0.537616  0.354012  0.548672  0.984601
3  0.557848  0.548790  0.354012  0.551646  0.984601
4  0.585851  0.554376  0.344753  0.553826  0.985507
Normalized N_TXT Data Example:
          0         1         2         3         4
0  0.438623  0.514107  0.405286  0.464623  0.032702
1  0.538922  0.503030  0.371145  0.465723  0.033563
2  0.550898  0.495298  0.363436  0.466981  0.034423
3  0.505988  0.496761  0.355727  0.469340  0.033563
4  0.505988  0.504702  0.363436  0.474057  0.036145


In [21]:
#Segment the Data
def segment_data(data, window_size=100, overlap=50):
    segments = []
    for df in data:
        num_segments = (len(df) - window_size) // overlap + 1
        for i in range(num_segments):
            start = i * overlap
            end = start + window_size
            segment = df.iloc[start:end].reset_index(drop=True)
            segments.append(segment)
    return segments

# Segment data from A_TXT and N_TXT folders
segmented_a_txt_data = segment_data(normalized_a_txt_data)
segmented_n_txt_data = segment_data(normalized_n_txt_data)

# Verify segmentation
print(f"Number of segments from A_TXT: {len(segmented_a_txt_data)}")
print(f"Number of segments from N_TXT: {len(segmented_n_txt_data)}")
print("Example segment from A_TXT:")
print(segmented_a_txt_data[0].head())
print("Example segment from N_TXT:")
print(segmented_n_txt_data[0].head())



Number of segments from A_TXT: 16961
Number of segments from N_TXT: 9448
Example segment from A_TXT:
          0         1         2         3         4
0  0.607959  0.632588  0.358642  0.555412  0.984601
1  0.596905  0.610242  0.365741  0.549366  0.985507
2  0.568902  0.537616  0.354012  0.548672  0.984601
3  0.557848  0.548790  0.354012  0.551646  0.984601
4  0.585851  0.554376  0.344753  0.553826  0.985507
Example segment from N_TXT:
          0         1         2         3         4
0  0.438623  0.514107  0.405286  0.464623  0.032702
1  0.538922  0.503030  0.371145  0.465723  0.033563
2  0.550898  0.495298  0.363436  0.466981  0.034423
3  0.505988  0.496761  0.355727  0.469340  0.033563
4  0.505988  0.504702  0.363436  0.474057  0.036145


In [26]:
#Extract Features
import numpy as np

def extract_features(segment):
    features = {}
    # Compute mean (single value for each column)
    features['mean'] = segment.mean().values.tolist()
    # Compute standard deviation (single value for each column)
    features['std'] = segment.std().values.tolist()
    # Compute root mean square (RMS) (single value for each column)
    features['rms'] = np.sqrt(np.mean(segment**2, axis=0)).tolist()
    # Compute number of zero crossings (single value for each column)
    zero_crossings = ((segment.shift(1) * segment) < 0).sum().values.tolist()
    features['zero_crossings'] = zero_crossings
    return features

def extract_features_from_segments(segments):
    all_features = []
    for segment in segments:
        features = extract_features(segment)
        # Flatten features dictionary into a list
        flat_features = []
        for key in ['mean', 'std', 'rms', 'zero_crossings']:
            flat_features.extend(features[key])
        all_features.append(flat_features)
    return all_features

# Extract features from segmented data
features_a_txt = extract_features_from_segments(segmented_a_txt_data)
features_n_txt = extract_features_from_segments(segmented_n_txt_data)

# Convert the features to DataFrame for easier manipulation and analysis
features_a_txt_df = pd.DataFrame(features_a_txt, columns=[
    'mean_1', 'mean_2', 'mean_3', 'mean_4', 'mean_5',
    'std_1', 'std_2', 'std_3', 'std_4', 'std_5',
    'rms_1', 'rms_2', 'rms_3', 'rms_4', 'rms_5',
    'zero_crossings_1', 'zero_crossings_2', 'zero_crossings_3', 'zero_crossings_4', 'zero_crossings_5'
])
features_n_txt_df = pd.DataFrame(features_n_txt, columns=[
    'mean_1', 'mean_2', 'mean_3', 'mean_4', 'mean_5',
    'std_1', 'std_2', 'std_3', 'std_4', 'std_5',
    'rms_1', 'rms_2', 'rms_3', 'rms_4', 'rms_5',
    'zero_crossings_1', 'zero_crossings_2', 'zero_crossings_3', 'zero_crossings_4', 'zero_crossings_5'
])

# Verify feature extraction
print(f"Extracted features for A_TXT data: {features_a_txt_df.shape}")
print(f"Extracted features for N_TXT data: {features_n_txt_df.shape}")
print("Example features from A_TXT:")
print(features_a_txt_df.head())
print("Example features from N_TXT:")
print(features_n_txt_df.head())



Extracted features for A_TXT data: (16961, 20)
Extracted features for N_TXT data: (9448, 20)
Example features from A_TXT:
     mean_1    mean_2    mean_3    mean_4    mean_5     std_1     std_2  \
0  0.603965  0.619717  0.355886  0.559005  0.984212  0.037909  0.023979   
1  0.608342  0.619896  0.354083  0.558702  0.983524  0.035306  0.018152   
2  0.608438  0.621756  0.354759  0.559344  0.982953  0.028020  0.023624   
3  0.608998  0.622352  0.355028  0.559202  0.982264  0.025408  0.019608   
4  0.608556  0.620574  0.355673  0.558490  0.981513  0.028601  0.012279   

      std_3     std_4     std_5     rms_1     rms_2     rms_3     rms_4  \
0  0.010885  0.004872  0.000991  0.605141  0.620176  0.356051  0.559026   
1  0.011352  0.003549  0.000995  0.609355  0.620159  0.354263  0.558714   
2  0.011666  0.003518  0.001041  0.609076  0.622200  0.354949  0.559355   
3  0.012873  0.003565  0.001094  0.609522  0.622658  0.355259  0.559213   
4  0.010555  0.003452  0.000884  0.609221  0.620694 

In [27]:
#preparing data for GAN
from sklearn.model_selection import train_test_split
import tensorflow as tf

# Combine A_TXT and N_TXT features
combined_features = pd.concat([features_a_txt_df, features_n_txt_df], ignore_index=True)

# Shuffle data
combined_features = combined_features.sample(frac=1).reset_index(drop=True)

# Split into training and validation sets
train_data, val_data = train_test_split(combined_features, test_size=0.2, shuffle=True)

# Convert to TensorFlow tensors
train_data_tensor = tf.convert_to_tensor(train_data, dtype=tf.float32)
val_data_tensor = tf.convert_to_tensor(val_data, dtype=tf.float32)

# Verify data shapes
print(f"Training data shape: {train_data_tensor.shape}")
print(f"Validation data shape: {val_data_tensor.shape}")





Training data shape: (21127, 20)
Validation data shape: (5282, 20)


In [29]:
#Train the GAN Model
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LeakyReLU, Reshape, Flatten
from tensorflow.keras.optimizers import Adam

# Define the generator model
def build_generator():
    model = Sequential()
    model.add(Dense(128, input_dim=combined_features.shape[1]))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(combined_features.shape[1], activation='tanh'))
    model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0002, beta_1=0.5))
    return model

# Define the discriminator model
def build_discriminator():
    model = Sequential()
    model.add(Dense(512, input_dim=combined_features.shape[1]))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(256))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(128))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0002, beta_1=0.5))
    return model

# Define the GAN model
def build_gan(generator, discriminator):
    model = Sequential()
    model.add(generator)
    model.add(discriminator)
    model.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0002, beta_1=0.5))
    return model

# Build and compile the models
generator = build_generator()
discriminator = build_discriminator()
gan = build_gan(generator, discriminator)

# Train the GAN
def train_gan(gan, generator, discriminator, train_data, epochs=10000, batch_size=64):
    for epoch in range(epochs):
        # Generate fake data
        noise = tf.random.normal(shape=(batch_size, combined_features.shape[1]))
        generated_data = generator.predict(noise)

        # Sample real data
        real_data_indices = np.random.randint(0, train_data.shape[0], size=batch_size)
        real_data = train_data.numpy()[real_data_indices]

        # Concatenate fake and real data
        combined_data = np.vstack([real_data, generated_data])

        # Labels for real and fake data
        labels_real = np.ones((batch_size, 1))
        labels_fake = np.zeros((batch_size, 1))
        labels = np.vstack([labels_real, labels_fake])

        # Train the discriminator
        discriminator_loss = discriminator.train_on_batch(combined_data, labels)

        # Train the generator
        noise = tf.random.normal(shape=(batch_size, combined_features.shape[1]))
        generator_loss = gan.train_on_batch(noise, labels_real)

        if epoch % 1000 == 0:
            print(f"Epoch {epoch} | Discriminator Loss: {discriminator_loss} | Generator Loss: {generator_loss}")

# Start training
train_gan(gan, generator, discriminator, train_data_tensor)



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 6000 | Discriminator Loss: 1.4165273904800415 | Generator Loss: 0.12153173983097076
Epoch 7000 | Discriminator Loss: 1.5165448188781738 | Generator Loss: 0.11007039994001389
Epoch 8000 | Discriminator Loss: 1.488002061843872 | Generator Loss: 0.11368215084075928
Epoch 9000 | Discriminator Loss: 1.427176833152771 | Generator Loss: 0.11495114117860794


In [30]:
from tensorflow.keras.models import save_model

# Save the generator and discriminator models
def save_gan(generator, discriminator, generator_path='generator_model.h5', discriminator_path='discriminator_model.h5'):
    generator.save(generator_path)
    discriminator.save(discriminator_path)
    print("Models saved successfully.")

# Assuming 'generator' and 'discriminator' are your trained models
save_gan(generator, discriminator)


Models saved successfully.


  saving_api.save_model(


In [31]:
from tensorflow.keras.models import load_model

# Load the generator and discriminator models
def load_gan(generator_path='generator_model.h5', discriminator_path='discriminator_model.h5'):
    generator = load_model(generator_path)
    discriminator = load_model(discriminator_path)
    return generator, discriminator

# Load the models
generator, discriminator = load_gan()

# You can now use 'generator' and 'discriminator' for further tasks


In [32]:
# Generate new data using the loaded generator
def generate_data(generator, noise_dim, num_samples):
    noise = np.random.normal(0, 1, (num_samples, noise_dim))
    generated_data = generator.predict(noise)
    return generated_data

# Example usage
noise_dim = combined_features.shape[1]  # Same as the input dimension used during training
num_samples = 10  # Number of samples to generate
generated_samples = generate_data(generator, noise_dim, num_samples)


