In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split 
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [2]:
Patient = '2'
data = pd.read_csv(f'p{Patient}_tpm.csv')
gene_symbols = data['Gene Name'].values.reshape(-1, 1)
data.drop(data.columns[0], axis=1, inplace=True)
data

Unnamed: 0,P2_d.3_Severe,P2_d.2_Severe,P2_d.1_Severe,P2_d0_Severe,P2_d1_Severe,P2_d2_Severe,P2_d4_Severe,P2_d5_Severe,P2_d6_Severe,P2_d7_Severe
0,1.075520,1.026133,0.915501,1.073553,1.071498,1.180263,1.134210,1.023740,0.862322,1.216322
1,2.698694,2.946050,3.070210,2.749033,2.474782,2.521224,3.118423,2.899919,2.228785,2.582810
2,22.215248,16.305909,20.653113,23.851245,19.229484,12.259623,15.147942,17.194324,18.757502,14.604062
3,14.742809,16.794703,16.702408,15.571379,12.912417,11.305714,11.666400,13.257035,12.938406,11.183513
4,6.056643,5.196954,5.927200,4.929898,4.396903,4.778500,4.836971,5.920507,4.977870,5.289075
...,...,...,...,...,...,...,...,...,...,...
22544,27.733099,25.671589,23.300585,26.766472,28.679802,23.457545,23.662283,25.612790,28.814464,27.821749
22545,42.585535,40.830882,43.438380,41.474756,39.943498,40.780043,45.020848,37.240145,35.611592,37.554267
22546,34.148099,32.304270,38.698699,40.060142,44.919794,40.766481,33.225917,43.585063,51.328681,44.301609
22547,19.105599,23.988768,31.755251,18.732984,24.067450,24.702550,22.369356,34.429848,17.829453,29.880144


In [12]:
# Define Dataset
class GeneExpressionDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets
        
    def __len__(self):
        return len(self.features)
        
    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

class CNN1D(nn.Module):
    def __init__(self):
        super(CNN1D, self).__init__()
        
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=64, kernel_size=3)
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3)
        self.relu = nn.ReLU()
        
        # Adding an RNN layer
        self.rnn = nn.RNN(input_size=128, hidden_size=32, num_layers=1, batch_first=True)
        
        self.fc = nn.Linear(32, 1)  # Adjusted input dimension

    def forward(self, x):
        x = x.unsqueeze(1)  # Adding a dimension for the single channel at each time step
        
        # Pass input through CNN
        cnn_out = self.relu(self.conv1(x))
        cnn_out = self.relu(self.conv2(cnn_out))
        
        # Prepare CNN output for RNN input
        rnn_input = cnn_out.permute(0, 2, 1)  # Reorder dimensions to [batch_size, seq_len, num_features]
        
        # Pass CNN output through RNN
        rnn_out, _ = self.rnn(rnn_input)
        rnn_out = rnn_out[:, -1, :]  # Take the output from the last time step of the RNN
        
        # Pass RNN output through FC layer
        out = self.fc(rnn_out)
        
        return out


class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        
        # Define the layers
        self.fc1 = nn.Linear(in_features=6, out_features=128)  # Assume input size is 6 for simplicity
        self.fc2 = nn.Linear(in_features=128, out_features=64)
        self.fc3 = nn.Linear(in_features=64, out_features=32)
        self.fc4 = nn.Linear(in_features=32, out_features=1)
        self.relu = nn.ReLU()
        
    def forward(self, x):

        x = self.relu(self.fc1(x))

        x = self.relu(self.fc2(x))

        x = self.relu(self.fc3(x))

        out = self.fc4(x)
        
        return out

class RNN1D(nn.Module):
    def __init__(self):
        super(RNN1D, self).__init__()
        
        self.rnn = nn.RNN(input_size=1, hidden_size=256, num_layers=1, batch_first=True)  # Single RNN layer
        self.fc = nn.Linear(256, 1)  # Adjusted input dimension

    def forward(self, x):
        x = x.unsqueeze(2)  # Adding a dimension for the single feature at each time step
        
        # Pass input through RNN
        rnn_out, _ = self.rnn(x)
        rnn_out = rnn_out[:, -1, :]  # Take the output from the last time step of the RNN
        
        # Pass RNN output through FC layer
        out = self.fc(rnn_out)
        
        return out

    
def sliding_window(data, window_size):
    features = []
    targets = []
    for i in range(data.shape[1]-window_size):
        features.append(data[:,i:i+window_size])
        targets.append(data[:,i+window_size])
    return np.array(features), np.array(targets)

# Extract features and targets using sliding window
window_size = 6
all_data = data.values[:,0:7]
X, y = sliding_window(all_data, window_size)
X = X.astype(float).reshape(-1, window_size)
y = y.astype(float).reshape(-1, 1)

X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

train_dataset = GeneExpressionDataset(X_train_tensor, y_train_tensor)
val_dataset = GeneExpressionDataset(X_val_tensor, y_val_tensor)
test_dataset = GeneExpressionDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN1D().to(device)

criterion = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

def direction_loss(pred, target):
    pred_diff = pred[1:] - pred[:-1]
    target_diff = target[1:] - target[:-1]
    mask = (pred_diff * target_diff) < 0
    return mask.float().mean()

direction_weight = 0.5

num_epochs = 30
for epoch in range(num_epochs):
    model.train()
    for batch_features, batch_targets in train_loader:
        batch_features, batch_targets = batch_features.to(device), batch_targets.to(device)
        outputs = model(batch_features)
        loss_mae = criterion(outputs, batch_targets)
        d_loss = direction_loss(outputs, batch_targets)
        loss = loss_mae + direction_weight * d_loss
#         loss = loss_mae
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_features, batch_targets in val_loader:
            batch_features, batch_targets = batch_features.to(device), batch_targets.to(device)
            outputs = model(batch_features)
            loss = criterion(outputs, batch_targets)
            val_loss += loss.item()
    val_loss /= len(val_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {loss.item():.4f}, Validation Loss: {val_loss:.4f}')

model.eval()
total_test_loss = 0
with torch.no_grad():
    for batch_features, batch_targets in test_loader:
        batch_features, batch_targets = batch_features.to(device), batch_targets.to(device)
        outputs = model(batch_features)
        loss = criterion(outputs, batch_targets)
        total_test_loss += loss.item()
        
average_test_loss = total_test_loss / len(test_loader)
print(f"Average Test Loss: {average_test_loss:.4f}")

Epoch [1/30], Training Loss: 11.2538, Validation Loss: 32.6546
Epoch [2/30], Training Loss: 9.8539, Validation Loss: 30.6137
Epoch [3/30], Training Loss: 8.9694, Validation Loss: 29.3592
Epoch [4/30], Training Loss: 8.2827, Validation Loss: 28.4910
Epoch [5/30], Training Loss: 7.9790, Validation Loss: 27.7930
Epoch [6/30], Training Loss: 7.6939, Validation Loss: 27.2066
Epoch [7/30], Training Loss: 7.4113, Validation Loss: 26.7152
Epoch [8/30], Training Loss: 7.0727, Validation Loss: 26.2874
Epoch [9/30], Training Loss: 6.9292, Validation Loss: 25.8896
Epoch [10/30], Training Loss: 6.6751, Validation Loss: 25.5138
Epoch [11/30], Training Loss: 6.4811, Validation Loss: 25.2254
Epoch [12/30], Training Loss: 6.5186, Validation Loss: 24.8967
Epoch [13/30], Training Loss: 6.4402, Validation Loss: 24.5987
Epoch [14/30], Training Loss: 6.4408, Validation Loss: 24.3496
Epoch [15/30], Training Loss: 6.3709, Validation Loss: 24.1092
Epoch [16/30], Training Loss: 6.2879, Validation Loss: 23.8824


In [44]:
input_data = data.iloc[:,1:7].values
input_data = input_data.astype(float).reshape(-1, window_size)
input_tensor = torch.tensor(input_data, dtype=torch.float32).to(device)

with torch.no_grad():
    predicted_d7 = model(input_tensor).cpu().numpy().reshape(-1, 1)

    
data = pd.read_csv(f'p{Patient}_tpm.csv')
ordered_data = data[['Gene Name', f'P{Patient}_d.3_Severe',f'P{Patient}_d.2_Severe',f'P{Patient}_d.1_Severe', f'P{Patient}_d0_Severe', f'P{Patient}_d1_Severe', f'P{Patient}_d2_Severe', f'P{Patient}_d3_Severe']].values

# Concatenate everything: gene symbols, ordered_data, predicted_6th, and predicted_7th
extended_data = np.concatenate((ordered_data, predicted_d7), axis=1)

# Define columns
columns = ['Gene Name', 'd-3','d-2' ,'d-1', 'd0', 'd1', 'd2', 'd3','d4']

# Create the DataFrame
extended_df = pd.DataFrame(extended_data, columns=columns)
    
extended_df.to_csv(f'p{Patient}_pred.csv')

In [33]:
Patient = '5'
data = pd.read_csv(f'p{Patient}_tpm.csv')
gene_symbols = data['Gene Name'].values.reshape(-1, 1)

data = data.iloc[:, 1:]
data.drop(data.columns[1], axis=1, inplace=True)

# Define Dataset
class GeneExpressionDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets
        
    def __len__(self):
        return len(self.features)
        
    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

class CNN1D(nn.Module):
    def __init__(self):
        super(CNN1D, self).__init__()
        
        # CNN Layers
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3)
        self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3)
        self.conv3 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=2)
        self.relu = nn.ReLU()
        
        # RNN Layer
        self.rnn = nn.RNN(input_size=1, hidden_size=32, num_layers=1, batch_first=True)
        
        # Fully Connected Layer
        self.fc1 = nn.Linear(64 + 32, 1) # Combined output from CNN and RNN

    def forward(self, x):
        # Pass input through CNN
        cnn_out = x.unsqueeze(1)  # Add channel dimension
        cnn_out = self.relu(self.conv1(cnn_out))
        cnn_out = self.relu(self.conv2(cnn_out))
        cnn_out = self.relu(self.conv3(cnn_out))
        cnn_out = cnn_out.view(cnn_out.size(0), -1)  # Flatten
        
        # Pass input through RNN
        rnn_out, _ = self.rnn(x.unsqueeze(2))
        rnn_out = rnn_out[:, -1, :]  # Take the output from the last RNN unit
        
        # Concatenate outputs and pass through FC layers
        combined_out = torch.cat((cnn_out, rnn_out), dim=1)
        out = self.fc1(combined_out)
        
        return out

def sliding_window(data, window_size):
    features = []
    targets = []
    for i in range(data.shape[1]-window_size):
        features.append(data[:,i:i+window_size])
        targets.append(data[:,i+window_size])
    return np.array(features), np.array(targets)

# Extract features and targets using sliding window
window_size = 6
all_data = data.values[:,1:9]
X, y = sliding_window(all_data, window_size)
X = X.astype(float).reshape(-1, window_size)
y = y.astype(float).reshape(-1, 1)

X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.2, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

train_dataset = GeneExpressionDataset(X_train_tensor, y_train_tensor)
val_dataset = GeneExpressionDataset(X_val_tensor, y_val_tensor)
test_dataset = GeneExpressionDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CNN1D().to(device)
criterion = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

def direction_loss(pred, target):
    pred_diff = pred[1:] - pred[:-1]
    target_diff = target[1:] - target[:-1]
    mask = (pred_diff * target_diff) < 0
    return mask.float().mean()

direction_weight = 0.5

num_epochs = 30
for epoch in range(num_epochs):
    model.train()
    for batch_features, batch_targets in train_loader:
        batch_features, batch_targets = batch_features.to(device), batch_targets.to(device)
        outputs = model(batch_features)
        loss_mae = criterion(outputs, batch_targets)
        d_loss = direction_loss(outputs, batch_targets)
        loss = loss_mae + direction_weight * d_loss
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_features, batch_targets in val_loader:
            batch_features, batch_targets = batch_features.to(device), batch_targets.to(device)
            outputs = model(batch_features)
            loss = criterion(outputs, batch_targets)
            val_loss += loss.item()
    val_loss /= len(val_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {loss.item():.4f}, Validation Loss: {val_loss:.4f}')

model.eval()
total_test_loss = 0
with torch.no_grad():
    for batch_features, batch_targets in test_loader:
        batch_features, batch_targets = batch_features.to(device), batch_targets.to(device)
        outputs = model(batch_features)
        loss = criterion(outputs, batch_targets)
        total_test_loss += loss.item()
        
average_test_loss = total_test_loss / len(test_loader)
print(f"Average Test Loss: {average_test_loss:.4f}")



ValueError: With n_samples=0, test_size=0.1 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [31]:
import torch

criterion = nn.L1Loss()

# Convert NumPy arrays to PyTorch tensors
predicted_d6_tensor = torch.tensor(predicted_d6, dtype=torch.float32)
predicted_d7_tensor = torch.tensor(predicted_d7, dtype=torch.float32)
actual_values1_tensor = torch.tensor(data.iloc[:, -2].values.astype(float).reshape(-1, 1), dtype=torch.float32)
actual_values_tensor = torch.tensor(data.iloc[:, -1].values.astype(float).reshape(-1, 1), dtype=torch.float32)

# Compute MAE using PyTorch
mae_loss1 = criterion(predicted_d6_tensor, actual_values1_tensor).item()
mae_loss = criterion(predicted_d7_tensor, actual_values_tensor).item()

print(f'Mean absolute error for p{Patient} d6: {mae_loss1:.4f}')
print(f'Mean absolute error for p{Patient} d7: {mae_loss:.4f}')


Mean absolute error for p2 d6: 6.8126
Mean absolute error for p2 d7: 5.2301
