In [47]:
import torch
import pandas as pd

df_testing = pd.read_parquet("loaded_data/a_patient_data_processed_cluster.parquet", engine='pyarrow')
df_testing.head()
print(df_testing.columns)
timestamp_count = df_testing.groupby('RecordID')['Time'].nunique()
print(timestamp_count)
# check if ICUType is a column
columns_to_check = ['ICUType', 'In-hospital_death']
missing_columns = [col for col in columns_to_check if col not in df_testing.columns]
missing_columns
df_testing_with_label = df_testing
print(len(df_testing_with_label.columns))
df_testing = df_testing.drop(columns=['ICUType', 'In-hospital_death'])
print(len(df_testing.columns))

# Load the full object (not just weights) from the .pth file
embeddings_tensor = torch.load('train_pat_embeddings.pth', map_location=torch.device('cpu'), weights_only=False)

print(len(embeddings_tensor.items()))

Index(['ALP', 'ALT', 'AST', 'Age', 'Albumin', 'BUN', 'Bilirubin',
       'Cholesterol', 'Creatinine', 'DiasABP', 'FiO2', 'GCS', 'Gender',
       'Glucose', 'HCO3', 'HCT', 'HR', 'Height', 'ICUType',
       'In-hospital_death', 'K', 'Lactate', 'MAP', 'MechVent', 'Mg',
       'NIDiasABP', 'NIMAP', 'NISysABP', 'Na', 'PaCO2', 'PaO2', 'Platelets',
       'RecordID', 'RespRate', 'SaO2', 'SysABP', 'Temp', 'Time', 'TroponinI',
       'TroponinT', 'Urine', 'WBC', 'Weight', 'pH'],
      dtype='object')
RecordID
132539.0    47
132540.0    48
132541.0    47
132543.0    48
132545.0    46
            ..
142665.0    48
142667.0    26
142670.0    44
142671.0    45
142673.0    47
Name: Time, Length: 3997, dtype: int64
44
42
736


In [37]:
import pandas as pd
import torch
from chronos import ChronosPipeline

pipeline = ChronosPipeline.from_pretrained(
    "amazon/chronos-t5-small",
    device_map="cuda",
    torch_dtype=torch.bfloat16,
)

df = pd.read_csv("https://raw.githubusercontent.com/AileenNielsen/TimeSeriesAnalysisWithPython/master/data/AirPassengers.csv")
print(df.shape[0])

# context must be either a 1D tensor, a list of 1D tensors,
# or a left-padded 2D tensor with batch as the first dimension
context = torch.tensor(df["#Passengers"])
embeddings, tokenizer_state = pipeline.embed(context)
embeddings.size()

144


torch.Size([1, 145, 512])

In [39]:

for patient_id, embedding in embeddings_tensor.items():
    if isinstance(embedding, torch.Tensor):
        #continue
        print(f"Patient ID: {patient_id}, Embedding size: {embedding.size()}")
    else:
        print(f"Patient ID: {patient_id}, Embedding is not a tensor")

Patient ID: 132539.0, Embedding size: torch.Size([1, 50, 512])
Patient ID: 132540.0, Embedding size: torch.Size([1, 50, 512])
Patient ID: 132541.0, Embedding size: torch.Size([1, 50, 512])
Patient ID: 132543.0, Embedding size: torch.Size([1, 50, 512])
Patient ID: 132545.0, Embedding size: torch.Size([1, 50, 512])
Patient ID: 132547.0, Embedding size: torch.Size([1, 50, 512])
Patient ID: 132548.0, Embedding size: torch.Size([1, 50, 512])
Patient ID: 132551.0, Embedding size: torch.Size([1, 50, 512])
Patient ID: 132554.0, Embedding size: torch.Size([1, 50, 512])
Patient ID: 132555.0, Embedding size: torch.Size([1, 50, 512])
Patient ID: 132556.0, Embedding size: torch.Size([1, 50, 512])
Patient ID: 132567.0, Embedding size: torch.Size([1, 50, 512])
Patient ID: 132568.0, Embedding size: torch.Size([1, 50, 512])
Patient ID: 132570.0, Embedding size: torch.Size([1, 50, 512])
Patient ID: 132573.0, Embedding size: torch.Size([1, 50, 512])
Patient ID: 132575.0, Embedding size: torch.Size([1, 50

In [44]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd

# Define the Linear Probe Model
class LinearProbeModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearProbeModel, self).__init__()
        self.fc = nn.Linear(input_dim, output_dim)
    
    def forward(self, x):
        return self.fc(x)

# Load the embeddings for each patient (you should have these from your previous code)
# Assuming all_patient_embeddings is already populated with the embeddings (from the Chronos pipeline)
# Convert the embeddings dictionary to a tensor format suitable for training
X_train = torch.stack(list(embeddings_tensor.values()))  # Stack all patient embeddings into a tensor
print(X_train)
print(X_train.size())
# Ensure the input data (X_train) is of type float32
X_train = X_train.float()


df_testing_with_label = df_testing_with_label.head(572)
# Assuming the target labels are stored in df_testing['In-hospital_death']
y_train = df_testing_with_label['In-hospital_death'].values  # Binary target (0 or 1)
y_train = torch.tensor(y_train, dtype=torch.long)  # Convert labels to tensor
print(y_train)



tensor([[[[-0.0059, -0.0200, -0.0216,  ...,  0.0121,  0.0020,  0.0141],
          [-0.0113, -0.0193, -0.0165,  ...,  0.0160, -0.0015,  0.0153],
          [-0.0109, -0.0154, -0.0202,  ...,  0.0191, -0.0033,  0.0122],
          ...,
          [ 0.0284,  0.0431, -0.0497,  ..., -0.0237, -0.0193,  0.0089],
          [ 0.0164,  0.0170, -0.0251,  ..., -0.0261, -0.0056,  0.0098],
          [ 0.0021,  0.0164,  0.0192,  ...,  0.0473, -0.0106, -0.0018]]],


        [[[ 0.0136, -0.0121, -0.0297,  ...,  0.0069,  0.0046,  0.0115],
          [ 0.0003, -0.0246, -0.0142,  ...,  0.0147,  0.0099,  0.0094],
          [ 0.0019, -0.0160, -0.0198,  ...,  0.0182,  0.0070,  0.0073],
          ...,
          [-0.0017,  0.0012, -0.0335,  ..., -0.0051,  0.0045,  0.0069],
          [ 0.0263,  0.0320, -0.0589,  ..., -0.0245, -0.0052,  0.0098],
          [ 0.0051,  0.0088,  0.0117,  ...,  0.0408,  0.0107, -0.0005]]],


        [[[ 0.0132, -0.0122, -0.0267,  ...,  0.0139, -0.0015,  0.0131],
          [-0.0043, -0.011

In [45]:

# Split the data into train and test sets (assuming you have separate training and test data)
# Here we just use the full data for demonstration purposes
X_test = X_train  # Use the same data for testing (adjust as per your actual data)
y_test = y_train  # Same target labels for testing

# Initialize the linear probe model
input_dim = 512  # Size of the embeddings (D)
print(input_dim)
output_dim = 2  # Output classes: In-hospital death (0 or 1)
model = LinearProbeModel(input_dim, output_dim)
# Ensure the model is using float32 (if needed)
model = model.float()

# Define the loss function and optimizer
loss_fn = nn.CrossEntropyLoss()  # Use CrossEntropyLoss for binary classification (two classes)
optimizer = optim.Adam(model.parameters(), lr=0.001)


512


In [46]:

# Function to train the Linear Probe Model
def train_model(model, X_train, y_train, loss_fn, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        X_train_flat = X_train.view(X_train.size(0), -1)
        print(X_train_flat.size())
        # Ensure X_train has shape (batch_size, input_dim)
        print(f"X_train shape: {X_train.shape}")
        # Ensure y_train has shape (batch_size,) and values are class indices (0 or 1)
        print(f"y_train shape: {y_train.shape}")
        
        X_train_flat = X_train.view(X_train.size(0), -1)
        output = model(X_train_flat)
        print(f"Output shape: {output.shape}")
        
        loss = loss_fn(output, y_train)
        loss.backward()
        optimizer.step()
        
        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Train the model
train_model(model, X_train, y_train, loss_fn, optimizer, num_epochs=50)

# Function to evaluate the Linear Probe Model
def evaluate_model(model, X_test, y_test):
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test)
        # For binary classification, apply softmax and choose the class with the highest probability
        _, predicted = torch.max(y_pred, 1)

    # Compute classification metrics
    accuracy = accuracy_score(y_test, predicted)
    f1 = f1_score(y_test, predicted, average='weighted')
    
    print(f"Accuracy: {accuracy:.4f}")
    print(f"F1 Score: {f1:.4f}")

# Evaluate the model on the test set
evaluate_model(model, X_test, y_test)


torch.Size([572, 25600])
X_train shape: torch.Size([572, 1, 50, 512])
y_train shape: torch.Size([12])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (572x25600 and 512x2)

In [22]:
y_test.shape[0]


12

In [None]:
import torch
import torch.nn as nn

class ChannelAggregationNN(nn.Module):
    def __init__(self, N, D, hidden_size=512, dropout_rate=0.3):
        super(ChannelAggregationNN, self).__init__()
        
        # Define the fully connected layers
        self.fc1 = nn.Linear(N * D, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
        self.fc3 = nn.Linear(hidden_size // 2, 1)  # Output layer
        
        # Dropout layer for regularization
        self.dropout = nn.Dropout(dropout_rate)
        
        # Activation function
        self.relu = nn.ReLU()
    
    def forward(self, x):
        # Concatenate embeddings of all channels
        x = x.view(x.size(0), -1)  # Flatten the input
        
        # Pass through the network layers
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.fc2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.fc3(x)  # Final output
        return x

# Example usage:
# Assuming we have N channels with D-dimensional embeddings
N = 5  # Number of channels
D = 128  # Embedding dimension
model = ChannelAggregationNN(N, D)
