<a href="https://colab.research.google.com/github/Jesson-Wei/Bayesian-inference-based-GNN/blob/main/Neurips2022_Understanding_Non_linearity_in_Graph_Neural_Networks_from_the_Bayesian_Inference_Perspective_Experiments.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Neurips 2022** "Understanding Non-linearity in Graph Neural Networks from the Bayesian-Inference Perspective" by Rongzhe Wei, Haoteng Yin, Junteng Jia, Austin R. Benson, Pan Li. **Link**: [Paper](https://arxiv.org/abs/2207.11311)

## First, we recap the main conclusions of this paper:
* When the node attributes are less informative compared to the structural information, non-linear propagation and linear propagation have almost the same mis-classification error.
* When the node attributes are more informative, non-linear propagation shows
advantages. The mis-classification error of non-linear propagation can be significantly smaller than that of linear propagation with sufficiently informative node attributes.
* When there is a distribution shift of the node attributes between the training and testing datasets, non-linearity provides better transferability in the regime of informative node attributes.

## Second, we demonstrate the real-data experiments under PubMed, Cora, CiteSeer datasets.

### Setting up experiments

In [None]:
# Add this in a Google Colab cell to install the correct version of Pytorch Geometric.
import torch

def format_pytorch_version(version):
  return version.split('+')[0]

TORCH_version = torch.__version__
TORCH = format_pytorch_version(TORCH_version)
print(f"PyTorch version: {TORCH}")

def format_cuda_version(version):
  return 'cu' + version.replace('.', '')

CUDA_version = torch.version.cuda
CUDA = format_cuda_version(CUDA_version)
print(f"CUDA Version: {CUDA}")

!pip install torch-scatter     -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-sparse      -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-cluster     -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-geometric 

!pip install SciencePlots
!pip install git+https://github.com/garrettj403/SciencePlots.git

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
import random
import math

import torch
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.datasets import Planetoid
import torch_geometric.utils as utils
import torch.nn as nn
from torch import linalg as LA

In [None]:
#Check whether "science" style is available
'science' in plt.style.available

### Import datasets

In [None]:
paths = ['/tmp/PubMed', '/tmp/Cora', '/tmp/Citeseer']
names = ['PubMed', 'Cora', 'Citeseer']
###Select dataset: 0 for PubMed, 1 for Cora, 2 for Citeseer
dataset = names[2]
graph = Planetoid(root=f'/tmp/{dataset}', name=dataset)
data = graph[0]
print(data)
print(f'#class: {graph.num_classes}, #feature_dim: {graph.num_node_features}')

### Check whether GPU is available

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

### Calculate adjacency matrix of the graph

In [None]:
Adj = utils.to_dense_adj(data.edge_index).squeeze(dim=0).to(device)
Adj

### Define the parameters of generated distributions and generating functions

In [None]:
if dataset in ['PubMed', 'Cora', 'Citeseer']:
    #training epochs
    epochs = 100
    #dimension of node attributes
    d = 10
    #Gaussian mean
    mu = 0.03 * np.ones(d)
    # mus = np.array([i * np.ones(d) for i in np.array([0.05, 0.1, 0.2, 0.3, 0.4, 0.5])])
    # mus = np.array([i * np.ones(d) for i in np.array([0.01, 0.025, 0.05, 0.075, 0.1, 0.125, 0.15, 0.175, 0.2])])
    mus = np.array([i * np.ones(d) for i in np.array([0.1, 0.2, 0.3, 0.4, 0.5])])
else:
    raise NotImplementedError

In [None]:
#Generate two-class Gaussian node attributes
def high_dim_Gaussian(Node_labels, d, mu):
    I = np.identity(d) / d
    X_feature = np.random.multivariate_normal(mu, I, len(Node_labels), 'raise')
    X_feature = X_feature * Node_labels.reshape(-1, 1)
    return X_feature

#Generate two-class Laplacian node attributes
def high_dim_Laplace(Node_labels, d, mu):
    X_feature = np.random.laplace(loc=mu, scale=1.0, size=(len(Node_labels), d))
    X_feature = X_feature * Node_labels.reshape(-1, 1)
    return X_feature

#Angle between two vectors are used to evaluate whether the classification plane is well aligned with the optimal plane
def angle(vec1, vec2):
    vec_angle = vec1 @ vec2 / (LA.norm(vec1) * LA.norm(vec2))
    return vec_angle

### Model Define

In [None]:
#Define the class for linear model
class LinearAgg(torch.nn.Module):
    def __init__(self, d, weighted=False):
        super().__init__()
        self.linear = nn.Linear(d, 1, bias=False)
        self.weight = nn.Parameter(torch.tensor(1.0), requires_grad=True) if weighted else None

    def forward(self, Adj, x):
        x = x.float()
        x = self.linear(x)
        x_copy = x.clone()
        if self.weight is not None:
            x = x_copy + self.weight * (Adj @ x)
        else:
            x = x_copy + Adj @ x

        return torch.sigmoid(x), self.linear.weight

#Define the class for optimal non-linear model (Gaussian)
class OptimalMP(torch.nn.Module):
    def __init__(self, d):
        super().__init__()
        self.linear = nn.Linear(d, 1, bias=False)
        self.thres = nn.Parameter(torch.tensor(0.2), requires_grad=True)

    def forward(self, Adj, x):
        x = x.float()
        x = self.linear(x)
        x_copy = x.clone()
        x[x > self.thres] = self.thres
        x[x < -self.thres] = -self.thres
        x = x_copy + Adj @ x

        return torch.sigmoid(x), self.linear.weight, self.thres

#Define the class for optimal non-linear model (Laplacian)
class OptimalMP_Laplacian(torch.nn.Module):
    def __init__(self, d, weighted=False):
        super().__init__()
        self.linear = nn.Linear(d, 1, bias=False)
        self.thres_1 = nn.Parameter(torch.tensor(2.0), requires_grad=True)
        self.thres_2 = nn.Parameter(torch.tensor(2.0), requires_grad=True)

    def forward(self, Adj, x):
        x = x.float()
        x[x > self.thres_1] = self.thres_1
        x[x < -self.thres_1] = -self.thres_1
        x = self.linear(x)
        x_copy = x.clone()
        x[x > self.thres_2] = self.thres_2
        x[x < -self.thres_2] = -self.thres_2
        x = x_copy + Adj @ x

        return torch.sigmoid(x), self.linear.weight, self.thres_1, self.thres_2


#Define the class for non-linear model with only feature transformation (Laplacian)
class OptimalMP_Laplacian_Psi(torch.nn.Module):
    def __init__(self, d):
        super().__init__()
        self.linear = nn.Linear(d, 1, bias=False)
        self.thres_1 = nn.Parameter(torch.tensor(2.0), requires_grad=True)

    def forward(self, Adj, x):
        x = x.float()
        x[x > self.thres_1] = self.thres_1
        x[x < -self.thres_1] = -self.thres_1
        x = self.linear(x)
        x_copy = x.clone()
        x = x_copy + Adj @ x

        return torch.sigmoid(x), self.linear.weight, self.thres_1


#Define the class for non-linear model with only non-linear propagation (Laplacian)
class OptimalMP_Laplacian_Phi(torch.nn.Module):
    def __init__(self, d):
        super().__init__()
        self.linear = nn.Linear(d, 1, bias=False)
        self.thres_2 = nn.Parameter(torch.tensor(2.0), requires_grad=True)

    def forward(self, Adj, x):
        x = x.float()
        x = self.linear(x)
        x_copy = x.clone()
        x[x > self.thres_2] = self.thres_2
        x[x < -self.thres_2] = -self.thres_2
        x = x_copy + Adj @ x

        return torch.sigmoid(x), self.linear.weight, self.thres_2


### Model Training

In [None]:
#Define epochs for the experiments
repeat = 1

### Gaussian distribution assumption


In [None]:
def train_gaussian(mus, d, repeat, data, Adj, device, epochs, num_classes, dataset):
    for lidx in range(num_classes):
        Node_labels = np.where((data.y == lidx), -1, 1)
        label = torch.from_numpy(np.where((data.y == lidx), -1, 1)).unsqueeze(dim=1).to(device).float()
        # Node_labels_loss = np.where((data.y == lidx), 0, 1)
        label_loss = torch.from_numpy(np.where((data.y == lidx), 0, 1)).unsqueeze(dim=1).to(device).float()

        results, weights = [], []
        for mu in mus:
            record = np.zeros([repeat, 6])
            mu_torch = torch.from_numpy(mu).float()
            for i in range(repeat):
                print('=' * 20)
                print(f'mu = {mu}')
                X_feature = high_dim_Gaussian(Node_labels, d, mu)
                X_feature_torch = torch.from_numpy(X_feature).to(device)
                X_feature_test = high_dim_Gaussian(Node_labels, d, mu)
                X_feature_torch_test = torch.from_numpy(X_feature_test).to(device)
                # train original acc
                acc_org = (np.sign(X_feature @ mu) == Node_labels).sum() / data.num_nodes
                print(f'acc_org={acc_org}')
                # test original acc
                acc_org_eval = (np.sign(X_feature_test @ mu) == Node_labels).sum() / data.num_nodes

                model_linear = LinearAgg(d).to(device)
                optimizer1 = torch.optim.Adam(model_linear.parameters(), lr=0.01, weight_decay=5e-4)
                criterion1 = nn.BCELoss()

                model_mp = OptimalMP(d).to(device)
                optimizer2 = torch.optim.Adam(model_mp.parameters(), lr=0.01, weight_decay=5e-4)
                criterion2 = nn.BCELoss()

                model_linear.train()
                model_mp.train()
                for epoch in range(epochs):
                    optimizer1.zero_grad()
                    optimizer2.zero_grad()
                    out_linear, out_linear_weight = model_linear(Adj, X_feature_torch)
                    out_mp, out_mp_weight, out_thres = model_mp(Adj, X_feature_torch)
                    predicted_label_linear = torch.sign(out_linear - 0.5)
                    predicted_label_mp = torch.sign(out_mp - 0.5)
                    loss_linear = criterion1(out_linear, label_loss)
                    loss_mp = criterion2(out_mp, label_loss)
                    acc_linear = (predicted_label_linear == label).sum() / data.num_nodes
                    acc_mp = (predicted_label_mp == label).sum() / data.num_nodes
                    loss_linear.backward()
                    loss_mp.backward()
                    optimizer1.step()
                    optimizer2.step()
                    if epoch % 50 == 0:
                        print(f'Epoch:{epoch}')
                        print(
                            f'Linear weight angle:{angle(out_linear_weight.detach().cpu().squeeze(), mu_torch).item()}')
                        print(f'MP weight angle:{angle(out_mp_weight.detach().cpu().squeeze(), mu_torch).item()}')
                        print(f'MP Threshold:{out_thres}')
                        print(f'Linear training loss:{loss_linear}')
                        print(f'MP training loss:{loss_mp}')
                        print(f'acc_linear:{acc_linear}')
                        print(f'acc_mp:{acc_mp}')

                model_linear.eval()
                model_mp.eval()

                out_linear_eval, out_linear_weight_eval = model_linear(Adj, X_feature_torch_test)
                out_mp_eval, out_mp_weight_eval, out_thres_eval = model_mp(Adj, X_feature_torch_test)

                predicted_label_linear_eval = torch.sign(out_linear_eval - 0.5)
                predicted_label_mp_eval = torch.sign(out_mp_eval - 0.5)

                acc_linear_eval = (predicted_label_linear_eval == label).sum() / data.num_nodes
                acc_mp_eval = (predicted_label_mp_eval == label).sum() / data.num_nodes
                print('*' * 20)
                print(f'acc_linear:{acc_linear_eval}')
                print(f'acc_mp:{acc_mp_eval}')
                record[i] = [acc_org, acc_org_eval, acc_mp.item(), acc_mp_eval.item(), acc_linear.item(),
                             acc_linear_eval.item()]
            original_avg, original_avg_eval, mp_avg, mp_avg_eval, linear_avg, linear_avg_eval = record.mean(axis=0)
            original_std, original_std_eval, mp_std, mp_std_eval, linear_std, linear_std_eval = record.std(axis=0)
            results.append([LA.norm(mu_torch).item(),
                            original_avg, mp_avg, linear_avg,
                            original_avg_eval, mp_avg_eval, linear_avg_eval,
                            original_std, original_std_eval, mp_std, mp_std_eval, linear_std, linear_std_eval])
            # weights.append(record[:, -1])
        # np.save(f'./results/semi_synthetic_Gau_{dataset}_l{lidx}.npy', results)
        total_results.append(results)
    return total_results

### Laplacian distribution assumption

In [None]:
def train_laplacian(mus, d, repeat, data, Adj, device, epochs, num_classes, dataset):
    total_results = []
    for lidx in range(num_classes):
        Node_labels = np.where((data.y == lidx), -1, 1)
        label = torch.from_numpy(np.where((data.y == lidx), -1, 1)).unsqueeze(dim=1).to(device).float()
        # Node_labels_loss = np.where((data.y == lidx), 0, 1)
        label_loss = torch.from_numpy(np.where((data.y == lidx), 0, 1)).unsqueeze(dim=1).to(device).float()

        results, weights = [], []
        for mu in mus:
            record = np.zeros([repeat, 10])
            mu_torch = torch.from_numpy(mu).float()
            for i in range(repeat):
                print('=' * 20)
                print(f'mu = {mu}')
                X_feature = high_dim_Laplace(Node_labels, d, mu)
                X_feature_torch = torch.from_numpy(X_feature).to(device)
                X_feature_test = high_dim_Laplace(Node_labels, d, mu)
                X_feature_torch_test = torch.from_numpy(X_feature_test).to(device)
                # train original acc
                acc_org = (np.sign(X_feature @ mu) == Node_labels).sum() / data.num_nodes
                print(f'acc_org={acc_org}')
                # test original acc
                acc_org_eval = (np.sign(X_feature_test @ mu) == Node_labels).sum() / data.num_nodes

                model_linear = LinearAgg(d).to(device)
                optimizer1 = torch.optim.Adam(model_linear.parameters(), lr=0.01, weight_decay=5e-4)
                criterion1 = nn.BCELoss()

                model_mp = OptimalMP_Laplacian(d).to(device)
                optimizer2 = torch.optim.Adam(model_mp.parameters(), lr=0.01, weight_decay=5e-4)
                criterion2 = nn.BCELoss()

                model_phi = OptimalMP_Laplacian_Phi(d).to(device)
                optimizer3 = torch.optim.Adam(model_phi.parameters(), lr=0.01, weight_decay=5e-4)
                criterion3 = nn.BCELoss()

                model_psi = OptimalMP_Laplacian_Psi(d).to(device)
                optimizer4 = torch.optim.Adam(model_psi.parameters(), lr=0.01, weight_decay=5e-4)
                criterion4 = nn.BCELoss()

                model_linear.train()
                model_mp.train()
                for epoch in range(epochs):
                    optimizer1.zero_grad()
                    optimizer2.zero_grad()
                    optimizer3.zero_grad()
                    optimizer4.zero_grad()
                    out_linear, out_linear_weight = model_linear(Adj, X_feature_torch)
                    out_mp, out_mp_weight, out_thres_1, out_thres_2 = model_mp(Adj, X_feature_torch)
                    out_phi, out_phi_weight, out_thres_phi = model_phi(Adj, X_feature_torch)
                    out_psi, out_psi_weight, out_thres_psi = model_psi(Adj, X_feature_torch)
                    predicted_label_linear = torch.sign(out_linear - 0.5)
                    predicted_label_mp = torch.sign(out_mp - 0.5)
                    predicted_label_phi = torch.sign(out_phi - 0.5)
                    predicted_label_psi = torch.sign(out_psi - 0.5)
                    loss_linear = criterion1(out_linear, label_loss)
                    loss_mp = criterion2(out_mp, label_loss)
                    loss_phi = criterion3(out_phi, label_loss)
                    loss_psi = criterion4(out_psi, label_loss)
                    acc_linear = (predicted_label_linear == label).sum() / data.num_nodes
                    acc_mp = (predicted_label_mp == label).sum() / data.num_nodes
                    acc_phi = (predicted_label_phi == label).sum() / data.num_nodes
                    acc_psi = (predicted_label_psi == label).sum() / data.num_nodes
                    loss_linear.backward()
                    loss_mp.backward()
                    loss_phi.backward()
                    loss_psi.backward()
                    optimizer1.step()
                    optimizer2.step()
                    optimizer3.step()
                    optimizer4.step()
                    if epoch % 100 == 0:
                        print(f'Epoch:{epoch}')
                        print(
                            f'Linear weight angle:{angle(out_linear_weight.detach().cpu().squeeze(), mu_torch).item()}')
                        # print(f'MP Factor:{model_mp.weight.item()}')
                        print(f'MP weight angle:{angle(out_mp_weight.detach().cpu().squeeze(), mu_torch).item()}')
                        print(f'Phi weight angle:{angle(out_phi_weight.detach().cpu().squeeze(), mu_torch).item()}')
                        print(f'Psi weight angle:{angle(out_psi_weight.detach().cpu().squeeze(), mu_torch).item()}')
                        print(f'MP Threshold 1:{out_thres_1}')
                        print(f'MP Threshold 2:{out_thres_2}')
                        print(f'Threshold Phi:{out_thres_phi}')
                        print(f'Threshold Psi:{out_thres_psi}')
                        print(f'Linear training loss:{loss_linear}')
                        print(f'MP training loss:{loss_mp}')
                        print(f'Phi training loss:{loss_phi}')
                        print(f'Psi training loss:{loss_psi}')
                        print(f'acc_linear:{acc_linear}')
                        print(f'acc_mp:{acc_mp}')
                        print(f'acc_phi:{acc_phi}')
                        print(f'acc_psi:{acc_psi}')

                model_linear.eval()
                model_mp.eval()
                model_phi.eval()
                model_psi.eval()

                out_linear_eval, out_linear_weight_eval = model_linear(Adj, X_feature_torch_test)
                out_mp_eval, out_mp_weight_eval, out_thres_eval_1, out_thres_eval_2 = model_mp(Adj,
                                                                                               X_feature_torch_test)
                out_phi_eval, out_phi_weight_eval, out_thres_eval_phi = model_phi(Adj, X_feature_torch_test)
                out_psi_eval, out_psi_weight_eval, out_thres_eval_psi = model_psi(Adj, X_feature_torch_test)

                predicted_label_linear_eval = torch.sign(out_linear_eval - 0.5)
                predicted_label_mp_eval = torch.sign(out_mp_eval - 0.5)
                predicted_label_phi_eval = torch.sign(out_phi_eval - 0.5)
                predicted_label_psi_eval = torch.sign(out_psi_eval - 0.5)

                acc_linear_eval = (predicted_label_linear_eval == label).sum() / data.num_nodes
                acc_mp_eval = (predicted_label_mp_eval == label).sum() / data.num_nodes
                acc_phi_eval = (predicted_label_phi_eval == label).sum() / data.num_nodes
                acc_psi_eval = (predicted_label_psi_eval == label).sum() / data.num_nodes
                print('*' * 20)
                print(f'acc_linear:{acc_linear_eval}')
                print(f'acc_mp:{acc_mp_eval}')
                print(f'acc_phi:{acc_phi_eval}')
                print(f'acc_psi:{acc_psi_eval}')

                record[i] = [acc_org, acc_org_eval, acc_mp.item(), acc_mp_eval.item(), acc_linear.item(),
                             acc_linear_eval.item(), acc_phi.item(), acc_phi_eval.item(), acc_psi.item(),
                             acc_psi_eval.item()]
            original_avg, original_avg_eval, mp_avg, mp_avg_eval, linear_avg, linear_avg_eval, phi_avg, phi_avg_eval, psi_avg, psi_avg_eval = record.mean(
                axis=0)
            original_std, original_std_eval, mp_std, mp_std_eval, linear_std, linear_std_eval, phi_std, phi_std_eval, psi_std, psi_std_eval = record.std(
                axis=0)
            results.append(
                [LA.norm(torch.from_numpy(mu).float()).numpy(), original_avg, mp_avg, linear_avg, phi_avg, psi_avg,
                 original_avg_eval,
                 mp_avg_eval, linear_avg_eval, phi_avg_eval, psi_avg_eval, original_std, original_std_eval, mp_std,
                 mp_std_eval, linear_std,
                 linear_std_eval, phi_std, phi_std_eval, psi_std, psi_std_eval])
            # weights.append(record[:, -1])
        # np.save(f'./results/semi_synthetic_LP_{dataset}_l{lidx}.npy', results)
        total_results.append(results)
    return total_results

In [None]:
#Train either Gaussian or Laplacian
# total_results = train_gaussian(mus, d, repeat, data, Adj, device, epochs, graph.num_classes, dataset)
total_results = train_laplacian(mus, d, repeat, data, Adj, device, epochs, graph.num_classes, dataset)

### Visualization

In [None]:
#view one example
pd.DataFrame(total_results[0], columns = ['mu_norm', 'original_avg', 'mp_avg', 'linear_avg', 'phi_avg', 'psi_avg', 'original_avg_eval', 'mp_avg_eval', 'linear_avg_eval', 'phi_avg_eval', 'psi_avg_eval', 'original_std', 'original_std_eval', 'mp_std', 'mp_std_eval', 'linear_std', 'linear_std_eval', 'phi_std', 'phi_std_eval', 'psi_std', 'psi_std_eval'])

### Calculate Average Performance

In [None]:
# #Avg Pubmed
# df_pubmed_OVA = pd.DataFrame(np.load('./results_New/semi_synthetic_LP_PubMed_l0.npy'), columns = ['mu_norm', 'original_avg', 'mp_avg', 'linear_avg', 'phi_avg', 'psi_avg', 'original_avg_eval', 'mp_avg_eval', 'linear_avg_eval', 'phi_avg_eval', 'psi_avg_eval', 'original_std', 'original_std_eval', 'mp_std', 'mp_std_eval', 'linear_std', 'linear_std_eval', 'phi_std', 'phi_std_eval', 'psi_std', 'psi_std_eval'])
# for i in range(1, 3):
#     df_pubmed_OVA = df_pubmed_OVA + pd.DataFrame(np.load(f'./results_New/semi_synthetic_LP_PubMed_l{i}.npy'), columns = ['mu_norm', 'original_avg', 'mp_avg', 'linear_avg', 'phi_avg', 'psi_avg', 'original_avg_eval', 'mp_avg_eval', 'linear_avg_eval', 'phi_avg_eval', 'psi_avg_eval', 'original_std', 'original_std_eval', 'mp_std', 'mp_std_eval', 'linear_std', 'linear_std_eval', 'phi_std', 'phi_std_eval', 'psi_std', 'psi_std_eval'])
# df_pubmed_OVA = df_pubmed_OVA / 3
# #Avg Cora
# df_cora_OVA = pd.DataFrame(np.load('./results_New/semi_synthetic_LP_Cora_l0.npy'), columns = ['mu_norm', 'original_avg', 'mp_avg', 'linear_avg', 'phi_avg', 'psi_avg', 'original_avg_eval', 'mp_avg_eval', 'linear_avg_eval', 'phi_avg_eval', 'psi_avg_eval', 'original_std', 'original_std_eval', 'mp_std', 'mp_std_eval', 'linear_std', 'linear_std_eval', 'phi_std', 'phi_std_eval', 'psi_std', 'psi_std_eval'])
# for i in range(1, 7):
#     df_cora_OVA = df_cora_OVA + pd.DataFrame(np.load(f'./results_New/semi_synthetic_LP_Cora_l{i}.npy'), columns = ['mu_norm', 'original_avg', 'mp_avg', 'linear_avg', 'phi_avg', 'psi_avg', 'original_avg_eval', 'mp_avg_eval', 'linear_avg_eval', 'phi_avg_eval', 'psi_avg_eval', 'original_std', 'original_std_eval', 'mp_std', 'mp_std_eval', 'linear_std', 'linear_std_eval', 'phi_std', 'phi_std_eval', 'psi_std', 'psi_std_eval'])
# df_cora_OVA = df_cora_OVA / 7
#Avg Citeseer
df_citeseer_OVA = pd.DataFrame(total_results[0], columns = ['mu_norm', 'original_avg', 'mp_avg', 'linear_avg', 'phi_avg', 'psi_avg', 'original_avg_eval', 'mp_avg_eval', 'linear_avg_eval', 'phi_avg_eval', 'psi_avg_eval', 'original_std', 'original_std_eval', 'mp_std', 'mp_std_eval', 'linear_std', 'linear_std_eval', 'phi_std', 'phi_std_eval', 'psi_std', 'psi_std_eval'])
for i in range(1, 6):
    df_citeseer_OVA = df_citeseer_OVA + pd.DataFrame(total_results[i], columns = ['mu_norm', 'original_avg', 'mp_avg', 'linear_avg', 'phi_avg', 'psi_avg', 'original_avg_eval', 'mp_avg_eval', 'linear_avg_eval', 'phi_avg_eval', 'psi_avg_eval', 'original_std', 'original_std_eval', 'mp_std', 'mp_std_eval', 'linear_std', 'linear_std_eval', 'phi_std', 'phi_std_eval', 'psi_std', 'psi_std_eval'])
df_citeseer_OVA = df_citeseer_OVA / 6

In [None]:
acc_org_citeseer_eval = df_citeseer_OVA['original_avg_eval']
acc_mp_citeseer_eval = df_citeseer_OVA['mp_avg_eval']
acc_linear_citeseer_eval = df_citeseer_OVA['linear_avg_eval']
acc_org_citeseer_std_eval = df_citeseer_OVA['original_std_eval']
acc_mp_citeseer_std_eval = df_citeseer_OVA['mp_std_eval']
acc_linear_citeseer_std_eval = df_citeseer_OVA['linear_std_eval']
acc_mp_phi_citeseer_eval = df_citeseer_OVA['phi_avg_eval']
acc_mp_psi_citeseer_eval = df_citeseer_OVA['psi_avg_eval']
acc_mp_phi_citeseer_std_eval = df_citeseer_OVA['phi_std_eval']
acc_mp_psi_citeseer_std_eval = df_citeseer_OVA['psi_std_eval']

In [None]:
#print 1/2*||\mu - \nu||_2
mu_norm = df_citeseer_OVA['mu_norm']
mu_norm

In [None]:
with plt.style.context(['science','no-latex']):
    fig, ax = plt.subplots(figsize=(9, 6))
    ax.errorbar(2*mu_norm, acc_mp_citeseer_eval, acc_mp_citeseer_std_eval, linestyle = '-', linewidth = 3, marker = 'o', label='Optimal Non-linear Propagation')
    ax.errorbar(2*mu_norm, acc_linear_citeseer_eval, acc_linear_citeseer_std_eval, linestyle = 'dashdot', linewidth = 3, marker = 'v', label='Linear Model')
    ax.errorbar(2*mu_norm, acc_mp_phi_citeseer_eval, acc_mp_phi_citeseer_std_eval, linestyle = 'dashdot', linewidth = 3, marker = '1', label='Only Phi Non-linear Propagation')
    ax.errorbar(2*mu_norm, acc_mp_psi_citeseer_eval, acc_mp_psi_citeseer_std_eval, linestyle = 'dashdot', linewidth = 3, alpha = 0.6, marker = '2', label='Only Psi Attribute Tansformation')
    ax.errorbar(2*mu_norm, acc_org_citeseer_eval, acc_org_citeseer_std_eval, linestyle = 'dotted', linewidth = 3, marker = 's', label='No Propagation')

    #     ax.set_title('Classification Accuracy on PubMed Dataset (Training Phase)', fontsize = 15)
    ax.set_title('CiteSeer - Laplacian', fontsize = 24)
    ax.legend(loc = 'lower right', fontsize = 18)
    ax.autoscale(tight=True)
    plt.ylim([0.4, 1])
    ax.tick_params(axis='both',
             labelsize=18, 
             color='black',    
             labelcolor='black', 
             direction='in'
              ) 
    plt.xlabel('Attributed Information ($||\mu - \nu||_2$)', fontsize = 24)
    plt.ylabel('Accuracy', fontsize = 24)
    # plt.savefig('OVA_CiteSeer_Testing_Laplacian_sufficient.pdf')
    plt.show()