In [12]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
# Transfer Stacking
def Ffold_cross_val(Xtrain, Ytrain, F, estimator):
# KFold
    row_Xtrain = Xtrain.shape[0]
    # 创建一个 KFold 对象，设置折数为 5
    kf = KFold(n_splits=F)
    # 使用 KFold 对象划分数据集，并进行交叉验证
    output = np.zeros(row_Xtrain, 1)
    for train_index, val_index in kf.split(Xtrain):
        x_train, x_val = [Xtrain[i] for i in train_index], [Xtrain[i] for i in val_index]
        y_train, _ = [Ytrain[i] for i in train_index], [Ytrain[i] for i in val_index]
        estimator.fit(x_train, y_train)
        output[val_index] = estimator.predict(x_val)
    return output
def TransferStacking(Xmsource, Xtarget, Ymsource, Ytarget, Xtest, *kargs):
    """
    Transfer Stacking
    parameter
    _________
    Xmsource = dict{source1: A,
                    source2: B,
                    ...}
    Xtarget - matrix
    Ymsource = dict{source1: A,
                    source2: B.
                    ...}
    Ytarget - vector
    Xtest

    Atrributes
    ----------
    
    return
    ------
    """
    Xmsource = list(Xmsource.values())
    Xsource = np.concatenate(Xmsource)
    Xtrain = np.concatenate([Xsource, Xtarget], axis=0)
    Ymsource = list(Ymsource.value())
    Ysource = np.concatenate(Ymsource)
    Ytrain = np.concatenate([Ysource, Ytarget], axis=0)
    
    num_source = len(Xmsource)
    row_Xtrain = Xtrain.shape[0]
    output = np.zeros(row_Xtrain, num_source)
    estimators = []
    for i in range(num_source):
        estimator = DecisionTreeClassifier(criterion='gini',max_depth=3, random_state=42)
        estimator.fit(Xmsource[i], Ymsource[i])
        output[:,i] = estimator.predict(Xtrain)
        estimators.append(estimator)
    
    reg = DecisionTreeClassifier(max_depth=2,splitter='random',max_features="log2",random_state=0)
    estimators.append(reg)

    output_cv = Ffold_cross_val(Xtrain, Ytrain, 5, reg)
    meta_feature = np.concatenate([output, output_cv], axis=1)
    

    linearR = LinearRegression()
    linearR.fit(meta_feature, Ytrain)
    print('The linear combination of hypothesis is founded:')
    print('coef:', linearR.coef_ ,'|| intercept :', linearR.intercept_)
    hypothesis = np.zeros(row_Xtrain, len(estimators))
    for j in range(len(estimators)):
        hypothesis[:,j] = estimators[j].predict(Xtest)

    coef = linearR.coef_
    intercept = linearR.intercept_
    predict = np.ones(row_Xtrain)*intercept
    for i in range(len(coef)):
        predict += coef[j]*hypothesis[:,j]
    return predict
        


In [24]:
my_dict = {
    'key1': [[1, 2, 3],[10,11,12]],
    'key2': [[4, 5, 6], [7, 8, 9]],
}
x = np.array([[1,1,1], [2, 2, 2]])
TransferStacking(my_dict, x)


TypeError: TransferStacking() missing 2 required positional arguments: 'Ymsource' and 'Ytarget'

In [13]:
import numpy as np
np.concatenate(list(my_dict.values()))

array([[ 1,  2,  3],
       [10, 11, 12],
       [ 4,  5,  6],
       [ 7,  8,  9]])

In [22]:
from sklearn.model_selection import KFold

# 假设有一个数据集 X 和对应的标签 y
X = [11, 21, 31, 41, 13, 6, 7, 8, 9, 10]
y = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]

# 创建一个 KFold 对象，设置折数为 5
kf = KFold(n_splits=5)

# 使用 KFold 对象划分数据集，并进行交叉验证
for train_index, test_index in kf.split(X):
    X_train, X_test = [X[i] for i in train_index], [X[i] for i in test_index]
    y_train, y_test = [y[i] for i in train_index], [y[i] for i in test_index]

    # 输出训练集和测试集的索引
    print("Train:", train_index)
    print("Test:", test_index)
    print()



Train: [2 3 4 5 6 7 8 9]
Test: [0 1]

Train: [0 1 4 5 6 7 8 9]
Test: [2 3]

Train: [0 1 2 3 6 7 8 9]
Test: [4 5]

Train: [0 1 2 3 4 5 8 9]
Test: [6 7]

Train: [0 1 2 3 4 5 6 7]
Test: [8 9]



In [None]:
def Transfer_Stacking(trans_S, Multi_trans_A, response_S, Multi_response_A, test,):
    """Boosting for Regression Transfer

    Please feel free to open issues in the Github : https://github.com/Bin-Cao/TrAdaboost
    or 
    contact Bin Cao (bcao@shu.edu.cn)
    in case of any problems/comments/suggestions in using the code. 

    Parameters
    ----------
    trans_S : feature matrix of same-distribution training data

    Multi_trans_A : dict, feature matrix of diff-distribution training data
    e.g.,
    Multi_trans_A = {
    'trans_A_1' :  data_1 , 
    'trans_A_2' : data_2 ,
    ......
    }

    response_S : responses of same-distribution training data, real number

    Multi_response_A : dict, responses of diff-distribution training data, real number
    e.g.,
    Multi_response_A = {
    'response_A_1' :  response_1 , 
    'response_A_2' : response_2 ,
    ......
    }

    test : feature matrix of test data

    Examples
    --------
    # same-distribution training data
    tarin_data = pd.read_csv('M_Sdata.csv')
    # two diff-distribution training data
    A1_tarin_data = pd.read_csv('M_Adata1.csv')
    A2_tarin_data = pd.read_csv('M_Adata2.csv')
    # test data
    test_data = pd.read_csv('M_Tdata.csv')

    Multi_trans_A = {
    'trans_A_1' : A1_tarin_data.iloc[:,:-1],
    'trans_A_2' : A2_tarin_data.iloc[:,:-1]
    }
    Multi_response_A = {
    'response_A_1' :  A1_tarin_data.iloc[:,-1] , 
    'response_A_2' :  A2_tarin_data.iloc[:,-1] ,
    }
    trans_S = tarin_data.iloc[:,:-1]
    response_S = tarin_data.iloc[:, -1]
    test = test_data.iloc[:,:-1]
 
    Transfer_Stacking(trans_S, Multi_trans_A, response_S, Multi_response_A, test,)

    References
    ----------
    .. [1] Pardoe, D., & Stone, P. (2010, June). 
    Boosting for regression transfer. 
    In Proceedings of the 27th International Conference 
    on International Conference on Machine Learning (pp. 863-870).

    """
    # generate a pool of experts according the diff-dis datasets
    weak_classifiers_set = []
    reg = DecisionTreeRegressor(max_depth=2,splitter='random',max_features="log2",random_state=0)
    for source in range(len(Multi_trans_A)):
        trans_A = list(Multi_trans_A.values())[source]
        response_A = list(Multi_response_A.values())[source]

        trans_A = np.asarray(trans_A, order='C')
        response_A = np.asarray(response_A, order='C')

        weak_classifier = reg.fit(trans_A, response_A, )
        weak_classifiers_set.append(weak_classifier)
    print('A set of experts is initilized and contains {} classifier'.format(len(weak_classifiers_set)))
    print('='*60)

    row_S = trans_S.shape[0]
    row_T = test.shape[0]
    print ('params initial finished.')

    X = np.array(trans_S)
    Y = np.array(response_S)
    LOOCV_LS_matrix = np.ones([row_S, len(weak_classifiers_set)+1])
    LOOCV_LS_matrix[:,-1] = LOOCV_output(X,Y)
    for j in range(len(weak_classifiers_set)):
        LOOCV_LS_matrix[:,j] = weak_classifiers_set[j].predict(X)
    
    # find the linear combination of hypotheses that minimizes squared error.
    reg = LinearRegression().fit(LOOCV_LS_matrix, Y)
    print('The linear combination of hypotheses is founded:')
    print('coef:', reg.coef_ ,'|| intercept :', reg.intercept_)
    coef = reg.coef_
    intercept = reg.intercept_
    # add the newly clf into the set
    weak_classifiers_set.append(reg.fit(X, Y))

    # save the prediction results of weak classifiers
    result_response = np.ones([row_T, len(weak_classifiers_set)])
    for item in range(len(weak_classifiers_set)):
        result_response[:,item] = weak_classifiers_set[item].predict(np.array(test))
    predict = np.ones(row_T) * intercept
    for j in range(len(coef)):
        predict += coef[j] * result_response[:,j]
    print('Transfer_Stacking is done')
    print('='*60)
    print('The prediction responses of test data are :')
    print(predict)
    return predict


def LOOCV_output(X,Y):
    loo = LeaveOneOut()
    reg = DecisionTreeRegressor(max_depth=2,splitter='random',max_features="log2",random_state=0)
    y_pre_loocv = []
    for train_index, test_index in loo.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, _ = Y[train_index], Y[test_index]
        weak_classifier_new = reg.fit(X_train, y_train)
        y_pre = weak_classifier_new.predict(X_test)
        y_pre_loocv.append(y_pre)
    return y_pre_loocv

In [4]:
my_dict = {'A': 1, 'B': 2, 'C': 3}
for i in range(2):
    first_key = next(iter(my_dict))
    first_value = my_dict[first_key]

    print(first_value)



1
1


In [5]:
from sklearn.linear_model import LinearRegression
import numpy as np

# 创建训练数据
X = np.array([[1], [2], [3], [4]])
y = np.array([3, 5, 7, 9])

# 创建线性回归模型并进行训练
model = LinearRegression()
model.fit(X, y)

# 进行预测
X_test = np.array([[5], [6]])
y_pred = model.predict(X_test)

print(y_pred)

[11. 13.]


In [6]:
model.coef_

array([2.])

In [7]:
model.intercept_

0.9999999999999982

In [8]:
from sklearn.linear_model import LinearRegression
import numpy as np

# 创建训练数据
X = np.array([[1, 2], [2, 4], [3, 6], [4, 8]])  # 多维特征
y = np.array([3, 5, 7, 9])

# 创建线性回归模型并进行训练
model = LinearRegression()
model.fit(X, y)

# 进行预测
X_test = np.array([[5, 10], [6, 12]])  # 新的测试数据
y_pred = model.predict(X_test)

print(y_pred)

[11. 13.]


In [10]:
model.coef_

array([0.4, 0.8])

In [11]:
model.intercept_

0.9999999999999991

In [12]:
from mobo import Mobo4mat

import pandas as pd



TypeError: non_dominated_sorting() takes 1 positional argument but 2 were given