### image features

In [1]:
import h5py
import numpy as np

with h5py.File('image_features.h5', 'r') as hf:
    image_features = hf['image_features'][:]  # 使用[:]来加载数据到内存中

# 现在 image_features 是一个NumPy数组，你可以像处理任何NumPy数组一样处理它
print(image_features.shape)

In [2]:
import torch
import torch.nn.functional as F


print("Shape of image_features:", image_features.shape)

# If it's a NumPy array, use np.transpose
image_features_transposed = np.transpose(image_features, (0, 2, 1))

# Convert the transposed array to a torch tensor
image_features_tensor = torch.tensor(image_features_transposed)

# Apply max pooling
max_pooled_features = F.max_pool1d(image_features_tensor, kernel_size=197)

# Squeeze the last dimension
max_pooled_features = max_pooled_features.squeeze(-1)
print("Shape of image_features:", max_pooled_features.shape)

### string features

In [3]:
import pandas as pd
string_features = pd.read_csv('string_features.csv')
string_features_np = string_features.values
string_features_tensor = torch.tensor(string_features_np, dtype=torch.float32)
print(string_features_tensor.shape)

### nlp and gene features

In [4]:
nlp_gene_features = pd.read_csv('Xtrain_nlp_gene_mean.csv')
nlp_gene_features_np = nlp_gene_features.iloc[:,1:].values
nlp_gene_features_tensor = torch.tensor(nlp_gene_features_np, dtype=torch.float32)
print(nlp_gene_features_tensor.shape)

### attention network

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicAttention(nn.Module):
    def __init__(self, feature_dim1, feature_dim2, attention_dim):
        super(BasicAttention, self).__init__()
        self.query = nn.Linear(feature_dim1, attention_dim)  # feature_dim1 对应第一个输入维度
        self.key = nn.Linear(feature_dim1, attention_dim)    # 假设 feature_dim1 也适用于第二个输入
        self.value = nn.Linear(feature_dim2, attention_dim)  # feature_dim2 对应第三个输入维度

    def forward(self, features1, features2, features3):
        # features1 和 features2 形状均为 [614, 768]
        # features3 形状为 [614, 1471]
        query = self.query(features1)  # [614, attention_dim]
        key = self.key(features2)      # [614, attention_dim]
        value = self.value(features3)  # [614, attention_dim]，注意这里对features3应用value

        # 计算注意力权重
        attention_weights = F.softmax(torch.matmul(query, key.transpose(-2, -1)), dim=-1) # [614, 614]

        # 应用注意力权重
        attended_features = torch.matmul(attention_weights, value) # [614, attention_dim]

        return attended_features
# 创建注意力模块的实例
attention_dim = 250  # 可以调整这个维度
attention_module = BasicAttention(768, 1471, attention_dim)

max_pooled_features = max_pooled_features.to(torch.float32)
string_features_tensor = string_features_tensor.to(torch.float32)
nlp_gene_features_tensor = nlp_gene_features_tensor.to(torch.float32)
fused_features = attention_module(max_pooled_features, string_features_tensor, nlp_gene_features_tensor)

In [6]:
fused_features.shape

In [7]:
fused_features_np = fused_features.detach().numpy()

# 将 NumPy 数组转换为 pandas DataFrame
fused_features_df = pd.DataFrame(fused_features_np)

# 保存 DataFrame 为 CSV 文件
fused_features_df.to_csv('Att_fea_train_250.csv', index=False)

import torch
import torch.nn as nn
import torch.nn.functional as F

class BasicAttention(nn.Module):
    def __init__(self, feature_dim, attention_dim):
        super(BasicAttention, self).__init__()
        self.query = nn.Linear(feature_dim, attention_dim)
        self.key = nn.Linear(feature_dim, attention_dim)
        self.value = nn.Linear(feature_dim, attention_dim)

    def forward(self, features1, features2):
        # features1 和 features2 形状均为 [614, 768]
        query = self.query(features1)  # [614, attention_dim]
        key = self.key(features2)      # [614, attention_dim]
        value = self.value(features2)  # [614, attention_dim]

        # 计算注意力权重
        attention_weights = F.softmax(torch.matmul(query, key.transpose(-2, -1)), dim=-1) # [614, 614]

        # 应用注意力权重
        attended_features = torch.matmul(attention_weights, value) # [614, attention_dim]

        return attended_features

########## 创建注意力模块的实例
attention_dim = 768  # 可以调整这个维度
attention_module = BasicAttention(768, attention_dim)

max_pooled_features = max_pooled_features.to(torch.float32)
string_features_tensor = string_features_tensor.to(torch.float32)

fused_features = attention_module(max_pooled_features, string_features_tensor)



fused_features_np = fused_features.detach().numpy()
fused_features_df = pd.DataFrame(fused_features_np)
fused_features_df.to_csv('attention_train_features.csv', index=False)

### test data features


In [8]:
import h5py
import numpy as np

with h5py.File('test_image_features.h5', 'r') as hf:
    test_image_features = hf['test_image_features'][:]  # 使用[:]来加载数据到内存中

# 现在 image_features 是一个NumPy数组，你可以像处理任何NumPy数组一样处理它
print(test_image_features.shape)
import torch
import torch.nn.functional as F

# If it's a NumPy array, use np.transpose
test_image_features_transposed = np.transpose(test_image_features, (0, 2, 1))

# Convert the transposed array to a torch tensor
test_image_features_tensor = torch.tensor(test_image_features_transposed)

# Apply max pooling
test_max_pooled_features = F.max_pool1d(test_image_features_tensor, kernel_size=197)

# Squeeze the last dimension
test_max_pooled_features = test_max_pooled_features.squeeze(-1)
test_max_pooled_features.shape

In [9]:
import pandas as pd
test_string_features = pd.read_csv('test_string_features.csv')
test_string_features_np = test_string_features.values
test_string_features_tensor = torch.tensor(test_string_features_np, dtype=torch.float32)

test_string_features_tensor.shape

test_nlp_gene_features = pd.read_csv('Xtest_nlp_gene_mean.csv')
test_nlp_gene_features_np = test_nlp_gene_features.iloc[:,1:].values
test_nlp_gene_features_tensor = torch.tensor(test_nlp_gene_features_np, dtype=torch.float32)
print(test_nlp_gene_features_tensor.shape)

In [10]:
print(test_nlp_gene_features_tensor.shape)

In [11]:
test_max_pooled_features = test_max_pooled_features.to(torch.float32)
test_string_features_tensor = test_string_features_tensor.to(torch.float32)
test_nlp_gene_features_tensor = test_nlp_gene_features_tensor.to(torch.float32)
test_fused_features = attention_module(test_max_pooled_features, test_string_features_tensor, test_nlp_gene_features_tensor)
test_fused_features.shape

In [12]:
test_fused_features_np = test_fused_features.detach().numpy()
test_fused_features_df = pd.DataFrame(test_fused_features_np)
test_fused_features_df.to_csv('Att_fea_test_250.csv', index=False)

test_fused_features_np = test_fused_features.detach().numpy()
test_fused_features_df = pd.DataFrame(test_fused_features_np)
test_fused_features_df.to_csv('attention_test_features.csv', index=False)