# 加载必要库

In [1]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from data.loaders import *
from methods.DomainAda import *
from utils.PLOT import PCA_plot,Domain_plot

loaders.py load successfully!


## 加载数据

In [2]:
dataset = 'default'

In [3]:
if dataset == 'number':
    mat_number = load_number()
    data = mat_number.data
    label = mat_number.target
    sample_domain = np.array([1]*2000 +[-1]*1800)

elif dataset == 'drug':
    mat_bulk = load_bulk_drug_response() 
    mat_sc = load_sc_drug_response()
    con_genes = list(set(mat_bulk.data.columns).intersection(mat_sc.data.columns))
    data = np.concatenate((mat_bulk.data.loc[:,con_genes], mat_sc.data.loc[:,con_genes]), axis=0)
    label = np.concatenate((mat_bulk.target, mat_sc.target), axis=0)
    sample_domain = np.array([1]*mat_bulk.data.shape[0] +[-1]*mat_sc.data.shape[0])
    
else:
    mat_amazon = load_amazon_fc6()
    mat_dslr = load_dslr_fc6()
    data = np.concatenate((mat_amazon.data, mat_dslr.data), axis=0)
    label = np.concatenate((mat_amazon.target, mat_dslr.target), axis=0)
    sample_domain = np.array([1]*mat_amazon.data.shape[0] +[-1]*mat_dslr.data.shape[0])

In [4]:
X = data
y = label

## 归一化到相同尺度

In [5]:
scaler = StandardScaler()
scaler.fit(X)
X[sample_domain == 1] = scaler.fit_transform(X[sample_domain == 1])
X[sample_domain == -1] = scaler.transform(X[sample_domain == -1])
# PCA_plot(X, y, sample_domain)

## 域适应对齐

In [6]:
method = 'CORAL'
if method == "CORAL":
    coral = CORAL(0.01)
    Xs_ada = coral.fit_transform(X, sample_domain) 
elif method == "OT_Exact":
    ot_exact = OT_Exact()
    Xs_ada = ot_exact.fit_transform(X, sample_domain) 
elif method == "OT_IT":
    ot_it = OT_IT()
    Xs_ada = ot_it.fit_transform(X, sample_domain) 
elif method == "OT_GL":
    ot_gl = OT_GL()
    Xs_ada = ot_gl.fit_transform(X, y ,sample_domain) 
elif method == "OT_Laplace":
    ot_lpl = OT_Laplace()
    Xs_ada = ot_lpl.fit_transform(X, y, sample_domain) 
elif method == "OT_Unbalance":
    ot_ubl = OT_Unbalance()
    Xs_ada = ot_ubl.fit_transform(X, sample_domain) 

print(f'域适应后源域均值：{Xs_ada.mean()}\t域适应前源域均值：{X[sample_domain == 1].mean()}\t目标域均值：{X[sample_domain == -1].mean()}')
print(f'域适应后源域方差：{Xs_ada.var()}\t域适应前源域方差{X[sample_domain == 1].var()}\t目标域方差：{X[sample_domain == -1].var()}')

域适应后源域均值：2.5409241306675587e-10	域适应前源域均值：-1.957199069479998e-09	目标域均值：-0.1330464631319046
域适应后源域方差：1.1105135311915948	域适应前源域方差1.0000003576278687	目标域方差：1.24617338180542


In [7]:
X_ada = np.concatenate((Xs_ada, X[sample_domain==-1]), axis=0)
# PCA_plot(X_ada, y, sample_domain)

# 训练
## 训练分类器

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

def param_search(model,param_grid,X,y,test_size):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
    grid_search = GridSearchCV(
        estimator=model,
        param_grid=param_grid,
        cv=5,                # 5 折交叉验证
        scoring='roc_auc_ovr',  # 评价指标
        n_jobs=-1            # 并行加速
    )

    grid_search.fit(X_train, y_train)
    print("最佳参数组合：", grid_search.best_params_)
    print("训练集最佳得分：", grid_search.best_score_)
    print("测试集得分：", grid_search.best_estimator_.score(X_test, y_test))
        
    return grid_search.best_params_

In [9]:
from sklearn.svm import SVC
SVM = SVC(kernel='rbf', probability=True, random_state=42)
param_grid = {
    'C': np.logspace(-5,4,10) ,
    'gamma':np.logspace(-5,4,10) ,
}

opt_params = param_search(SVM,param_grid,X[sample_domain == 1],y[sample_domain == 1],0.3)

最佳参数组合： {'C': np.float64(100.0), 'gamma': np.float64(0.0001)}
训练集最佳得分： 0.9813136770766056
测试集得分： 0.8073286052009456


In [10]:
opt_params = param_search(SVM,param_grid,X_ada[sample_domain == 1],y[sample_domain == 1],0.3)
SVM = SVC(kernel='rbf', C=opt_params['C'], gamma=opt_params['gamma'], probability=True, random_state=42)

最佳参数组合： {'C': np.float64(10.0), 'gamma': np.float64(0.0001)}
训练集最佳得分： 0.967398897543578
测试集得分： 0.735224586288416


## 预测目标域

In [12]:
SVM.fit(X_ada[sample_domain == 1],y=y[sample_domain == 1])
y_prob = SVM.predict_proba(X_ada[sample_domain == -1])

In [None]:
import sys
sys.path.append('/home/kazundo/WorkSpace/')
from MyModule.Mystatic import get_benchmark

get_benchmark(y[sample_domain == 0], y_prob)