In [6]:
import numpy as np
import pandas as pd
import torch
from sklearn.cluster import KMeans


class CIFCF():
    def __init__(self, input_size, output_size, hidden_layers_num):
        self.input_size = input_size
        self.output_size = output_size
        self.structure = [self.input_size] + hidden_layers_num * [10] + [self.output_size]
        self.W = []
        self.B = []
        self.All_Layers = []
        self.input = None
        self.output = None
        for i in range(len(self.structure)-1):
            self.W.append(torch.randn(self.structure[i+1], self.structure[i], requires_grad = True))
            self.B.append(torch.randn(self.structure[i+1], requires_grad = True))
    
    def forward(self, x):
        self.All_Layers = []
        self.input = x
        self.All_Layers.append(x)
        for i in range(len(self.structure)-2):
            self.All_Layers.append(torch.relu(self.W[i] @ self.All_Layers[i] + self.B[i]))
        self.output = torch.sigmoid(self.W[-1] @ self.All_Layers[-1] + self.B[-1])
        return self.output
    
    def global_intervention(self, batch_X, batch_y):
        global_intervention_W = []
        global_intervention_B = []
        for w in self.W:
            global_intervention_W.append(torch.zeros_like(w))
        for b in self.B:
            global_intervention_B.append(torch.zeros_like(b))
        
        for i in range(batch_y.size(dim = 0)):
            x = batch_X[i]
            y = self.forward(x)
            l = torch.nn.functional.binary_cross_entropy_with_logits(y ,batch_y[i])
            l.backward(retain_graph = True)
            for i in range(len(self.W)):
                global_intervention_W[i] += self.W[i].grad
                global_intervention_B[i] += self.B[i].grad
        
        return global_intervention_W, global_intervention_B
    
    def forward_with_global_intervention(self, x, global_intervention, alpha):
        layers = []
        layers.append(x)
        for i in range(len(self.structure)-2):
            layers.append(torch.relu((self.W[i] + alpha * global_intervention[0][i]) 
                                    @ layers[i] + self.B[i] + alpha * global_intervention[1][i]))
        self.output = torch.sigmoid((self.W[-1] + alpha * global_intervention[0][-1]) 
                                    @ self.All_Layers[-1] + self.B[-1] + alpha * global_intervention[1][-1])
        return self.output
    
    def update_model(self, batch_X, batch_y, alpha, beta, X, y, epoch):

        copy_W = self.W
        copy_B = self.B
        M = batch_y.size(dim = 0)

        global_intervention = self.global_intervention(batch_X, batch_y)
        y_predicted = torch.empty_like(y)
        for i in range(y.size(dim = 0)):
            y_predicted[i] = self.forward_with_global_intervention(X[i], global_intervention, alpha/M)
        
        loss = torch.nn.functional.binary_cross_entropy_with_logits(y_predicted, y)
        for i in range(len(self.W)):
            self.W[i].grad = None
            self.B[i].grad = None
        loss.backward()
        for i in range(len(self.W)):
            tmp1 = self.W[i].grad
            tmp2 = self.B[i].grad
            with torch.no_grad():
                self.W[i] = copy_W[i] - beta * tmp1
                self.B[i] = copy_B[i] - beta * tmp2
        print('Model updated, loss:' + str(loss.item()))
        for i in range(len(self.W)):
            self.W[i].requires_grad_(True)
            self.B[i].requires_grad_(True)


# model = CIFCF(4, 2, 1)
# print(model.structure)
# print(model.forward(torch.ones(4)).data)
# print(model.W[0][0].size(dim=0))
# print(model.global_intervention((torch.ones(4), torch.ones(4))))






In [2]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
def load_dataset():
    train_data = pd.read_csv('CN_data.csv')
    feature_names = ['Value', 'UNRATE']
    X = np.array(train_data[feature_names])
    # X = preprocessing.normalize(X)
    # 5000e8, 180000e8 
    y = np.array(train_data['GDP'])
    y_tmp = []
    for GDP in y:
        if GDP <= 1e12:
            y_tmp.append([1] + [0]*17)
        elif GDP <= 2e12:
            y_tmp.append([0]*1 + [1] + [0]*16)
        elif GDP <= 3e12:
            y_tmp.append([0]*2 + [1] + [0]*15)
        elif GDP <= 4e12:
            y_tmp.append([0]*3 + [1] + [0]*14)
        elif GDP <= 5e12:
            y_tmp.append([0]*4 + [1] + [0]*13)
        elif GDP <= 6e12:
            y_tmp.append([0]*5 + [1] + [0]*12)
        elif GDP <= 7e12:
            y_tmp.append([0]*6 + [1] + [0]*11)
        elif GDP <= 8e12:
            y_tmp.append([0]*7 + [1] + [0]*10)
        elif GDP <= 9e12:
            y_tmp.append([0]*8 + [1] + [0]*9)
        elif GDP <= 10e12:
            y_tmp.append([0]*9 + [1] + [0]*8)
        elif GDP <= 11e12:
            y_tmp.append([0]*10 + [1] + [0]*7)
        elif GDP <= 12e12:
            y_tmp.append([0]*11 + [1] + [0]*6)
        elif GDP <= 13e12:
            y_tmp.append([0]*12 + [1] + [0]*5)
        elif GDP <= 14e12:
            y_tmp.append([0]*13 + [1] + [0]*4)
        elif GDP <= 15e12:
            y_tmp.append([0]*14 + [1] + [0]*3)
        elif GDP <= 16e12:
            y_tmp.append([0]*15 + [1] + [0]*2)
        elif GDP <= 17e12:
            y_tmp.append([0]*16 + [1] + [0]*1)
        else:
            y_tmp.append([0]*17 + [1])
    y = np.array(y_tmp)
    return X, y



In [3]:
def main(X, y, alpha, beta, K, batch_size, num_epochs):
    # build a model
    model = CIFCF(X.size(dim=1), y.size(dim=1), 2)
    
    # do cluster
    kmeans = KMeans(n_clusters=K, random_state=0)
    cluster_labels = kmeans.fit_predict(X)

    for epoch in range(num_epochs):
        cluster_indices = [torch.where(torch.Tensor(cluster_labels == k))[0] for k in range(K)]
        batch_X = []
        batch_y = []
        for k in range(K):
            batch_indices = torch.randint(low=0, high=len(cluster_indices[k]), size=(batch_size,))
            batch_X.append(X[cluster_indices[k][batch_indices]].numpy())
            batch_y.append(y[cluster_indices[k][batch_indices]].numpy())
        batch_X = np.array(batch_X)
        batch_X = np.reshape(batch_X, (batch_X.shape[0] * batch_X.shape[1], batch_X.shape[2]))
        batch_X = torch.from_numpy(batch_X)
        batch_y = np.array(batch_y)
        batch_y = np.reshape(batch_y, (batch_y.shape[0] * batch_y.shape[1], batch_y.shape[2]))
        batch_y = torch.from_numpy(batch_y)
        
        model.update_model(batch_X, batch_y, alpha, beta, X, y, epoch)
        
    

In [10]:
X, y = load_dataset()
X = torch.Tensor(X)
y = torch.Tensor(y)
alpha = 0.01
beta = 0.01
K = 3
batch_size = 5
num_epochs = 20
main(X, y, alpha, beta, K, batch_size, num_epochs)

Model updated, loss:0.9299614429473877
Model updated, loss:0.9256110191345215
Model updated, loss:0.9792195558547974
Model updated, loss:0.9260925054550171
Model updated, loss:0.9257679581642151
Model updated, loss:1.0241601467132568
Model updated, loss:0.9262093901634216
Model updated, loss:0.9253810048103333
Model updated, loss:0.9978370666503906
Model updated, loss:0.9294636249542236
Model updated, loss:0.9152834415435791
Model updated, loss:0.9313403964042664
Model updated, loss:0.9763050675392151
Model updated, loss:0.922038733959198
Model updated, loss:0.9714675545692444
Model updated, loss:0.9230933785438538
Model updated, loss:0.922995924949646
Model updated, loss:0.9379460215568542
Model updated, loss:1.025076150894165
Model updated, loss:0.9154303669929504
