<a href="https://colab.research.google.com/github/KeerthanaNarayan/Contrastive_Learning_for_Fall_detection/blob/main/SimCLR_Training_Phase_Week_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import zipfile
import glob
import numpy as np
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Load and preprocess the accelerometer data
def load_and_preprocess_data():
    zip_path = 'fall-dataset-all.zip' #zip_path = 'fall-dataset-all.zip'

    # Extract the CSV files from the zip file
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        csv_files = [file for file in zip_ref.namelist() if file.endswith('.csv')]
        zip_ref.extractall(members=csv_files)

    # Read and concatenate the extracted CSV files into a DataFrame
    data = pd.concat([pd.read_csv(file, encoding='latin-1') for file in csv_files], ignore_index=True)
    accelerometer_data = data[["Acc(X)", "Acc(Y)", "Acc(Z)", "Rot(X)", "Rot(Y)", "Rot(Z)", "Pitch", "Roll", "Yaw", "Timestamp"]].values

    # Standardize the data
    scaler = StandardScaler()
    standardized_data = scaler.fit_transform(accelerometer_data)

    return standardized_data

# Define the augmentation function
def augment_function(sample):
    augmented_sample = apply_augmentation(sample)
    return augmented_sample

# Define augmentation functions
def apply_augmentation(sample):
    augmented_sample = sample.copy()

    # Noise Injection
    noise = np.random.normal(loc=0, scale=0.1, size=augmented_sample.shape)
    augmented_sample += noise

    # Time Shifting
    shift_amount = np.random.randint(low=1, high=len(augmented_sample))
    augmented_sample = np.roll(augmented_sample, shift_amount, axis=0)

    # Magnitude Scaling
    scaling_factor = np.random.uniform(low=0.8, high=1.2)
    augmented_sample *= scaling_factor

    return augmented_sample

# Define dataset class for accelerometer data
class AccelerometerDataset(torch.utils.data.Dataset):
    def __init__(self, data):
        self.data = data

    def __getitem__(self, index):
        sample = self.data[index]
        augmented_sample_1 = augment_function(sample)
        augmented_sample_2 = augment_function(sample)
        return augmented_sample_1, augmented_sample_2

    def __len__(self):
        return len(self.data)

# Load and preprocess your accelerometer data
data = load_and_preprocess_data()

# Create the dataset
dataset = AccelerometerDataset(data)

# Create the data loader
batch_size = 64
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Define the SimCLR model architecture
class SimCLRModel(nn.Module):
    def __init__(self, num_steps):
        super(SimCLRModel, self).__init__()
        self.embedding_size = 128
        self.num_steps = num_steps

        self.backbone = nn.Sequential(
            nn.Conv1d(1, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2),
            nn.Conv1d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2, stride=2)
        )

        self.fc = nn.Linear(128 * (num_steps // 4), self.embedding_size)

    def forward(self, x):
        x = self.backbone(x)
        x = x.view(x.size(0), -1)
        embedding = self.fc(x)
        return embedding

# Define the contrastive loss function
class ContrastiveLoss(nn.Module):
    def __init__(self, temperature=1.0):
        super(ContrastiveLoss, self).__init__()
        self.temperature = temperature

    def forward(self, embeddings_1, embeddings_2):
        # Normalize the embeddings
        embeddings_1 = nn.functional.normalize(embeddings_1, dim=1)
        embeddings_2 = nn.functional.normalize(embeddings_2, dim=1)

        # Calculate cosine similarity between the embeddings
        similarities = torch.matmul(embeddings_1, embeddings_2.T) / self.temperature

        # Generate target labels (1 for positive pairs, 0 for negative pairs)
        labels = torch.arange(embeddings_1.size(0)).to(embeddings_1.device)

        # Calculate contrastive loss
        loss = nn.CrossEntropyLoss()(similarities, labels)

        return loss

# Initialize the SimCLR model, contrastive loss, and optimizer
num_steps = dataset[0][0].shape[0]
model = SimCLRModel(num_steps).double()
criterion = ContrastiveLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    for batch in data_loader:
        # Clear the gradients
        optimizer.zero_grad()

        # Get the batch of augmented samples
        augmented_samples_1, augmented_samples_2 = batch

        # Reshape the input data to include the num_channels dimension
        augmented_samples_1 = augmented_samples_1.unsqueeze(1)
        augmented_samples_2 = augmented_samples_2.unsqueeze(1)

        # Forward pass
        embeddings_1 = model(augmented_samples_1.to(device).double())
        embeddings_2 = model(augmented_samples_2.to(device).double())

        # Calculate the contrastive loss
        loss = criterion(embeddings_1, embeddings_2)

        # Backward pass
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}")


Epoch [1/10], Loss: 3.242416078900435
Epoch [2/10], Loss: 3.22059810826573
Epoch [3/10], Loss: 3.20605821241998
Epoch [4/10], Loss: 3.2142134552177897
Epoch [5/10], Loss: 3.2006392361755283
Epoch [6/10], Loss: 3.2003081892176124
Epoch [7/10], Loss: 3.20280687717565
Epoch [8/10], Loss: 3.1952038601111146
Epoch [9/10], Loss: 3.19566827932253
Epoch [10/10], Loss: 3.21330200153859


In [None]:
# Save the trained SimCLR model
torch.save(model.state_dict(), 'simclr_model.pth')

# Define the fall detection model
class FallDetectionModel(nn.Module):
    def __init__(self, input_size, num_classes):
        super(FallDetectionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Create an instance of the fall detection model
num_classes = 2  # Specify the number of classes for fall detection
fall_model = FallDetectionModel(input_size=128, num_classes=num_classes)

# Define the fall detection loss function
fall_loss_function = nn.CrossEntropyLoss()

# Define the fall detection optimizer
fall_optimizer = optim.SGD(fall_model.parameters(), lr=0.001)

# Load the saved SimCLR model and extract learned embeddings
simclr_model = SimCLRModel(num_steps)
simclr_model.load_state_dict(torch.load('simclr_model.pth'))
simclr_model.eval()




SimCLRModel(
  (backbone): Sequential(
    (0): Conv1d(1, 64, kernel_size=(3,), stride=(1,), padding=(1,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=(1,))
    (4): ReLU()
    (5): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Linear(in_features=256, out_features=128, bias=True)
)

In [None]:
# To be incorporated into the previous load function
# Load and preprocess the accelerometer data
def load_and_preprocess(zip_path):

    # Extract the CSV files from the zip file
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        csv_files = [file for file in zip_ref.namelist() if file.endswith('.csv')]
        zip_ref.extractall(members=csv_files)

    # Read and concatenate the extracted CSV files into a DataFrame
    data = pd.concat([pd.read_csv(file, encoding='latin-1') for file in csv_files], ignore_index=True)
    accelerometer_data = data[["Acc(X)", "Acc(Y)", "Acc(Z)", "Rot(X)", "Rot(Y)", "Rot(Z)", "Pitch", "Roll", "Yaw", "Timestamp"]].values
    labels = list(data["Fall"])#.values
    # Standardize the data
    scaler = StandardScaler()
    standardized_data = scaler.fit_transform(accelerometer_data)

    return standardized_data,labels

In [None]:
class AccelerometerDataset(torch.utils.data.Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __getitem__(self, index):
        sample = self.data[index]
        augmented_sample_1 = augment_function(sample)
        augmented_sample_2 = augment_function(sample)
        label = self.labels[index]  # Get the label for this sample
        return augmented_sample_1, augmented_sample_2, label  # Return augmented samples and label

    def __len__(self):
        return len(self.data)




In [None]:

# # Load the fall-dataset-features csv files
# feature_files = glob.glob('fall-dataset-features/*.csv')
# features = []
# for feature_file in feature_files:
#     df = pd.read_csv(feature_file)
#     features.append(df.to_numpy())

# # Split the features into train and test sets
# train_features, test_features = np.split(features, [int(0.8 * len(features))])

# Load the fall-dataset-raw csv files
raw_files = "fall-dataset-raw.zip"#glob.glob('fall-dataset-raw/*.csv')
raw_data, labels = load_and_preprocess(raw_files)

raw_data = raw_data[:10000] #1
labels = labels[:10000]

train_labels=[]
test_labels=[]

# Split the raw data into train and test sets
train_raw_data, test_raw_data = raw_data[:int(0.8 * len(raw_data))], raw_data[int(0.8 * len(raw_data)):]

# Split the raw data labels into train and test sets
train_labels, test_labels = labels[:int(0.8 * len(raw_data))], labels[int(0.8 * len(raw_data)):]

# Define the train and test datasets
train_dataset = AccelerometerDataset(train_raw_data,train_labels)
test_dataset = AccelerometerDataset(test_raw_data,test_labels)


In [None]:
len(train_dataset.data)

8000

In [None]:
# Define the batch size
batch_size = 64
# batch_size = 10 #2
# Create the data loaders
train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_data_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [None]:
# batch_size = 10 #2

In [None]:
simclr_model.to(device)

SimCLRModel(
  (backbone): Sequential(
    (0): Conv1d(1, 64, kernel_size=(3,), stride=(1,), padding=(1,))
    (1): ReLU()
    (2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv1d(64, 128, kernel_size=(3,), stride=(1,), padding=(1,))
    (4): ReLU()
    (5): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Linear(in_features=256, out_features=128, bias=True)
)

In [None]:
len(train_dataset.data)

8000

In [None]:
len(train_labels)

8000

In [None]:
train_embeddings = []
train_labels_list = []  # Create a list to hold the labels for each batch

for batch in train_data_loader:
    # print(batch)
    # print('1')
    augmented_samples_1, augmented_samples_2, labels = batch
    # print(augmented_samples_1)
    # print('2')
    # print(augmented_samples_2)
    # print('3')
    augmented_samples_1 = augmented_samples_1.unsqueeze(1).to(device)
    augmented_samples_2 = augmented_samples_2.unsqueeze(1).to(device)
    # print(augmented_samples_1)
    # print('4')
    # print(augmented_samples_2)
    # print('5')
    embeddings_1 = simclr_model(augmented_samples_1.to(device).float())
    embeddings_2 = simclr_model(augmented_samples_2.to(device).float())
    # print(embeddings_1)
    # print('6')
    # print(embeddings_2)
    # print('7')
    # print(len(embeddings_1))
    # print(len(embeddings_2))
    train_embeddings.append(embeddings_1)
    # print(len(train_embeddings))
    train_embeddings.append(embeddings_2)
    # print(len(train_embeddings))
    # print(train_embeddings)
    # print('9')

    # Create a tensor of batch_size with corresponding labels (assuming train_labels is already a tensor)
    # temp_labels = train_labels[len(train_labels_list) : len(train_labels_list) + len(embeddings_1)]
    # print(len(temp_labels))

    # train_labels_list.extend(temp_labels)
    # train_labels_list.extend(temp_labels)
    # print(len(train_labels_list))

    train_labels_list.extend(labels)
    train_labels_list.extend(labels)


    # train_labels_list.extend(labels)

train_embeddings = torch.cat(train_embeddings, dim=0)
# print(len(train_labels_list))
train_labels = np.array(train_labels_list)
# Concatenate all the elements in train_labels_list to obtain the final train_labels tensor
# train_labels = torch.cat(train_labels_list, dim=0)

# Similar changes for test_data_loader and test_labels


test_embeddings = []
test_labels_list = []  # New list to store labels
for batch in test_data_loader:
    augmented_samples_1, augmented_samples_2, t_labels = batch
    augmented_samples_1 = augmented_samples_1.unsqueeze(1).to(device)
    augmented_samples_2 = augmented_samples_2.unsqueeze(1).to(device)
    embeddings_1 = simclr_model(augmented_samples_1.to(device).float())
    embeddings_2 = simclr_model(augmented_samples_2.to(device).float())
    test_embeddings.append(embeddings_1)
    test_embeddings.append(embeddings_2)

    # Get the corresponding labels for this batch
    # temp_labels = test_labels[len(test_labels_list) : len(test_labels_list) + len(embeddings_1)]
    # print(len(temp_labels))

    test_labels_list.extend(t_labels)
    test_labels_list.extend(t_labels)

# Concatenate the embeddings and labels lists into tensors
test_embeddings = torch.cat(test_embeddings, dim=0)
test_labels = np.array(test_labels_list)



In [None]:
len(train_embeddings)
len(train_labels)

16000

In [None]:
train_labels = torch.tensor(train_labels)
test_labels = torch.tensor(test_labels)

In [None]:
len(test_labels)

4000

In [None]:
# Move the fall detection model to the same device as the embeddings
fall_model = fall_model.to(device)

# Training loop for the fall detection model
num_epochs = 10

for epoch in range(num_epochs):
    fall_model.train()
    total_loss = 0.0  # Track the total loss for the epoch

    for i in range(len(train_embeddings)):
        # Get the embeddings and labels for the current batch
        embeddings = train_embeddings[i].unsqueeze(0)
        labels = train_labels[i].unsqueeze(0)

        # Move embeddings and labels to the same device as the model
        embeddings = embeddings.to(device)
        labels = labels.to(device)

        # Convert the inputs and labels to torch.float32
        embeddings = embeddings.float()
        labels = labels.float()

        # Forward pass through the fall detection model
        outputs = fall_model(embeddings)

        # Calculate the fall detection loss
        fall_loss = fall_loss_function(outputs, labels.to(torch.int64))
        total_loss += fall_loss.item()

        # Zero the gradients
        fall_optimizer.zero_grad()

        # Backward pass and optimization
        fall_loss.backward(retain_graph=True)
        fall_optimizer.step()

    # Evaluate the fall detection model
    fall_model.eval()
    with torch.no_grad():
        total_correct = 0
        total_samples = 0
        for i in range(len(test_embeddings)):
            # Get the embeddings and labels for the current batch
            embeddings = test_embeddings[i].unsqueeze(0)
            labels = test_labels[i].unsqueeze(0)

            # Move embeddings and labels to the same device as the model
            embeddings = embeddings.to(device)
            labels = labels.to(device)

            # Convert the inputs and labels to torch.float32
            embeddings = embeddings.float()
            labels = labels.float()

            # Forward pass through the fall detection model
            outputs = fall_model(embeddings)

            # Calculate accuracy for this batch
            predicted = torch.argmax(outputs, 1)
            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)

        accuracy = total_correct / total_samples
        print(f"Epoch [{epoch+1}/{num_epochs}], Fall Detection Loss: {total_loss}, Test Accuracy: {accuracy:.4f}")

    # Calculate the average loss for the epoch
    avg_loss = total_loss / len(train_embeddings)

    print(f"Epoch [{epoch+1}/{num_epochs}], Fall Detection Loss: {avg_loss}")


Epoch [1/10], Fall Detection Loss: 4002.4648423382428, Test Accuracy: 0.5467
Epoch [1/10], Fall Detection Loss: 0.25015405264614016
Epoch [2/10], Fall Detection Loss: 3180.034875508394, Test Accuracy: 0.5480
Epoch [2/10], Fall Detection Loss: 0.19875217971927464
Epoch [3/10], Fall Detection Loss: 3109.569534753456, Test Accuracy: 0.5633
Epoch [3/10], Fall Detection Loss: 0.194348095922091
Epoch [4/10], Fall Detection Loss: 3039.7669946499077, Test Accuracy: 0.5690
Epoch [4/10], Fall Detection Loss: 0.18998543716561922
Epoch [5/10], Fall Detection Loss: 3000.4687299801517, Test Accuracy: 0.6080
Epoch [5/10], Fall Detection Loss: 0.18752929562375947
Epoch [6/10], Fall Detection Loss: 2927.3422497412757, Test Accuracy: 0.5470
Epoch [6/10], Fall Detection Loss: 0.18295889060882972
Epoch [7/10], Fall Detection Loss: 2916.1927434811805, Test Accuracy: 0.5517
Epoch [7/10], Fall Detection Loss: 0.18226204646757377
Epoch [8/10], Fall Detection Loss: 2822.719527293017, Test Accuracy: 0.5530
Epoc