In [5]:
# DomainNet source task embedding
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler

# Source task data
source_data = [
    ["convnext", "convnext_small", 50223688, 83.616,8.68],
    ["convnext", "convnext_tiny", 28589128, 82.52, 4.46],
    ["densenet", "densenet121", 7978856, 74.434, 2.83],
    ["efficientnet","efficientnet_b0",5288548,77.692,0.39],
    ["efficientnet","efficientnet_b3",12233232,82.008,1.83],
    ["mobilenet", "mobilenet_v2", 3504872, 72.154, 0.3],
    ["resnet","resnet101",68883240,78.468,11.4],
    ["resnet", "resnet50", 25557032, 80.858, 4.09],
    ["resnet", "resnet18", 11689512, 69.758, 1.81],
    ["resnet", "wide_resnet50_2", 44549160, 81.886, 7.8]
]

# 对source task进行特征工程
source_architecture_family = [row[0] for row in source_data]
source_model_name = [row[1] for row in source_data]
source_scalar_features = [[row[2], row[3], row[4]] for row in source_data]

# One-hot编码
enc = OneHotEncoder(handle_unknown='ignore')
source_architecture_family_one_hot = enc.fit_transform(np.array(source_architecture_family).reshape(-1, 1)).toarray()
source_model_name_one_hot = enc.fit_transform(np.array(source_model_name).reshape(-1, 1)).toarray()

# 归一化scalar特征
scaler = StandardScaler()
source_scalar_features_normalized = scaler.fit_transform(source_scalar_features)

source_embedding = np.concatenate((source_architecture_family_one_hot, source_model_name_one_hot, source_scalar_features_normalized), axis=1)
np.save("DomainNet_source_embedding.npy", source_embedding)


In [19]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
# 指定文件路径
file_path = "./target_embedding.xlsx"

# 读取 Excel 文件
df = pd.read_excel(file_path)

# 初始化 OneHotEncoder 和 StandardScaler
enc = OneHotEncoder(sparse=False)
scaler = StandardScaler()

# 选择需要进行 One-hot 编码的列和需要标准化的列
datasets_name = enc.fit_transform(df[['dataset name']].values)
domain_one_hot = enc.fit_transform(df[['domain']].values)
categories_one_hot = enc.fit_transform(df[['categories']].values)

# 选择需要进行标准化的列
train_sample_size_normalized = scaler.fit_transform(df[['Train_Sample_Size']].values.reshape(-1, 1))
num_pre_class_normalized = scaler.fit_transform(df[['num_pre_class']].values.reshape(-1, 1))

# 合并处理后的数据
processed_data = np.concatenate([datasets_name, domain_one_hot, num_pre_class_normalized, categories_one_hot, train_sample_size_normalized], axis=1)




In [33]:
target_embedding = np.concatenate((processed_data, FM_embedding), axis=1)


In [39]:
import numpy as np

# 假设 source_embedding 和 target_embedding 已经定义

# 获取 source_embedding 和 target_embedding 的维度
source_dim = s_embedding.shape[1]  # 13
target_dim = target_embedding.shape[1]  # 33

# 计算新的维度
new_dim = source_dim + target_dim  # 46

# 创建新的 embedding 数组，并用 0 填充
new_source_embedding = np.zeros((s_embedding.shape[0], new_dim))
new_target_embedding = np.zeros((target_embedding.shape[0], new_dim))

# 将 source_embedding 的前 13 维复制到新的 embedding 数组中
new_source_embedding[:, :source_dim] = s_embedding

# 将 target_embedding 的后 33 维复制到新的 embedding 数组中
new_target_embedding[:, source_dim:] = target_embedding

# 保存新的 embedding 数组为 npy 文件
np.save('DomainNet_new_source_embedding.npy', new_source_embedding)
np.save('DomainNet_new_target_embedding.npy', new_target_embedding)

In [21]:
# 保存为 NumPy 文件
np.save("target_domain_embedding.npy", processed_data)

# # 转换回 DataFrame 并保存为 Excel
# processed_df = pd.DataFrame(processed_data)
# processed_df.to_excel("processed_data.xlsx", index=False)


In [None]:
import pandas as pd

# 指定文件路径
file_path = "./target_embedding.xlsx"




# DomainNet target task embedding
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler

# Target task data
target_data = [
    ["Domain Net_subset_1", "sketch", 2, 121, 110,],
    ["Domain Net_subset_2", "sketch", 4, 400, 349,],
    ["Domain Net_subset_3", "sketch", 6, 600, 600,],
]

# 270, （10，30，50） ,（6个domain）,（任务1-任务15， label 2，2，2，4，4，4，8，8，8，16，16，16，32，32，32）

target_dataset_name = [row[0] for row in target_data]
target_domain_name = [row[1] for row in target_data]
target_scalar_features = [[row[2], row[3], row[4]] for row in target_data]

target_scalar_features_normalized = scaler.fit_transform(target_scalar_features)
target_dataset_name_one_hot = enc.fit_transform(np.array(target_dataset_name).reshape(-1, 1)).toarray()
target_domain_name_one_hot = enc.fit_transform(np.array(target_domain_name).reshape(-1, 1)).toarray()

target_embedding = np.concatenate((target_dataset_name_one_hot, target_domain_name_one_hot, target_scalar_features_normalized), axis=1)

np.save("target_embedding.npy", target_embedding)

In [None]:
import numpy as np

# 假设 source_embedding 和 target_embedding 已经定义

# 获取 source_embedding 和 target_embedding 的维度
source_dim = s_embedding.shape[1]  # 13
target_dim = target_embedding.shape[1]  # 33

# 计算新的维度
new_dim = source_dim + target_dim  # 46

# 创建新的 embedding 数组，并用 0 填充
new_source_embedding = np.zeros((s_embedding.shape[0], new_dim))
new_target_embedding = np.zeros((target_embedding.shape[0], new_dim))

# 将 source_embedding 的前 13 维复制到新的 embedding 数组中
new_source_embedding[:, :source_dim] = s_embedding

# 将 target_embedding 的后 33 维复制到新的 embedding 数组中
new_target_embedding[:, source_dim:] = target_embedding

# 保存新的 embedding 数组为 npy 文件
np.save('DomainNet_new_source_embedding.npy', new_source_embedding)
np.save('DomainNet_new_target_embedding.npy', new_target_embedding)