In [2]:
import os
import torch
import wandb
import random
import numpy as np
import pandas as pd

import torch_geometric as pyg
from torch_geometric.data import Data
from torch_geometric.utils import homophily

import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
wandb.login()

In [3]:
run = 1
wandb.init( project = 'fedgdrop',
        name=f'homophily-analysis-v{run}' 
    )

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33memirceyani[0m ([33mfedgraphlearn[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
datapath = "../../../datasets/"

In [5]:
def torch_load(base_dir, filename):
    fpath = os.path.join(base_dir, filename)    
    return torch.load(fpath, map_location=torch.device('cpu'))

def get_homophily(Dset):
    edge_hom= homophily(edge_index = Dset.edge_index, y =  Dset.y)
    node_hom =homophily(edge_index = Dset.edge_index, y =  Dset.y, method = 'node')
    class_in_hom = homophily(edge_index = Dset.edge_index, y =  Dset.y, method = 'edge_insensitive')
    return edge_hom, node_hom, class_in_hom

## Cora Dataset

### Homophily Across Subgraphs
#### 5 Clients

In [6]:
n_clients = 5

cora_5_df= pd.DataFrame(columns = ['Tr Homophily v1','Tr Homophily v2', 'Tr Homophily v3',
                             'Val Homophily v1','Val Homophily v2', 'Val Homophily v3',
                             'Tst Homophily v1','Tst Homophily v2', 'Tst Homophily v3'])

for client_id in range(n_clients):
        ds = f'{client_id}-Cora'
        partition = torch_load(datapath, f'Cora_disjoint/{n_clients}/partition_{client_id}.pt')
        #Data objects
        tr, val, tst = partition['client_tr'], partition['client_val'] , partition['client_tst']
        tr_hom1, tr_hom2, tr_hom3 = get_homophily(tr)
        val_hom1, val_hom2, val_hom3 = get_homophily(val)
        tst_hom1, tst_hom2, tst_hom3 = get_homophily(tst)
        cora_5_df.loc[ds, :] = [ tr_hom1, tr_hom2, tr_hom3, val_hom1, val_hom2, val_hom3,tst_hom1, tst_hom2, tst_hom3 ]

wandb_cora_cli5_df= wandb.Table(dataframe=cora_5_df)
wandb.log({"cora_5_homophily" : wandb_cora_cli5_df})
                

        


       

In [7]:
n_clients = 10

cora_10_df= pd.DataFrame(columns = ['Tr Homophily v1','Tr Homophily v2', 'Tr Homophily v3',
                             'Val Homophily v1','Val Homophily v2', 'Val Homophily v3',
                             'Tst Homophily v1','Tst Homophily v2', 'Tst Homophily v3'])

for client_id in range(n_clients):
        ds = f'{client_id}-Cora'
        partition = torch_load(datapath, f'Cora_disjoint/{n_clients}/partition_{client_id}.pt')
        #Data objects
        tr, val, tst = partition['client_tr'], partition['client_val'] , partition['client_tst']
        tr_hom1, tr_hom2, tr_hom3 = get_homophily(tr)
        val_hom1, val_hom2, val_hom3 = get_homophily(val)
        tst_hom1, tst_hom2, tst_hom3 = get_homophily(tst)
        cora_10_df.loc[ds, :] = [ tr_hom1, tr_hom2, tr_hom3, val_hom1, val_hom2, val_hom3,tst_hom1, tst_hom2, tst_hom3 ]

wandb_cora_cli10_df= wandb.Table(dataframe=cora_10_df)
wandb.log({"cora_10_homophily" : wandb_cora_cli10_df})
                

        


       

In [8]:
n_clients = 20

cora_20_df= pd.DataFrame(columns = ['Tr Homophily v1','Tr Homophily v2', 'Tr Homophily v3',
                             'Val Homophily v1','Val Homophily v2', 'Val Homophily v3',
                             'Tst Homophily v1','Tst Homophily v2', 'Tst Homophily v3'])

for client_id in range(n_clients):
        ds = f'{client_id}-Cora'
        partition = torch_load(datapath, f'Cora_disjoint/{n_clients}/partition_{client_id}.pt')
        #Data objects
        tr, val, tst = partition['client_tr'], partition['client_val'] , partition['client_tst']
        tr_hom1, tr_hom2, tr_hom3 = get_homophily(tr)
        val_hom1, val_hom2, val_hom3 = get_homophily(val)
        tst_hom1, tst_hom2, tst_hom3 = get_homophily(tst)
        cora_20_df.loc[ds, :] = [ tr_hom1, tr_hom2, tr_hom3, val_hom1, val_hom2, val_hom3,tst_hom1, tst_hom2, tst_hom3 ]

wandb_cora_cli20_df= wandb.Table(dataframe=cora_20_df)
wandb.log({"cora_20_homophily" : wandb_cora_cli20_df})
                

        


       

## CiteSeer Dataset
### 5 Clients

In [9]:
n_clients = 5

seer_5_df= pd.DataFrame(columns = ['Tr Homophily v1','Tr Homophily v2', 'Tr Homophily v3',
                             'Val Homophily v1','Val Homophily v2', 'Val Homophily v3',
                             'Tst Homophily v1','Tst Homophily v2', 'Tst Homophily v3'])

for client_id in range(n_clients):
        ds = f'{client_id}-CiteSeer'
        partition = torch_load(datapath, f'CiteSeer_disjoint/{n_clients}/partition_{client_id}.pt')
        #Data objects
        tr, val, tst = partition['client_tr'], partition['client_val'] , partition['client_tst']
        tr_hom1, tr_hom2, tr_hom3 = get_homophily(tr)
        val_hom1, val_hom2, val_hom3 = get_homophily(val)
        tst_hom1, tst_hom2, tst_hom3 = get_homophily(tst)
        seer_5_df.loc[ds, :] = [ tr_hom1, tr_hom2, tr_hom3, val_hom1, val_hom2, val_hom3,tst_hom1, tst_hom2, tst_hom3 ]

wandb_seer_cli5_df= wandb.Table(dataframe=seer_5_df)
wandb.log({"citeseer_5_homophily" : wandb_seer_cli5_df})
                

        


       

In [10]:
n_clients = 10

seer_10_df= pd.DataFrame(columns = ['Tr Homophily v1','Tr Homophily v2', 'Tr Homophily v3',
                             'Val Homophily v1','Val Homophily v2', 'Val Homophily v3',
                             'Tst Homophily v1','Tst Homophily v2', 'Tst Homophily v3'])

for client_id in range(n_clients):
        ds = f'{client_id}-CiteSeer'
        partition = torch_load(datapath, f'CiteSeer_disjoint/{n_clients}/partition_{client_id}.pt')
        #Data objects
        tr, val, tst = partition['client_tr'], partition['client_val'] , partition['client_tst']
        tr_hom1, tr_hom2, tr_hom3 = get_homophily(tr)
        val_hom1, val_hom2, val_hom3 = get_homophily(val)
        tst_hom1, tst_hom2, tst_hom3 = get_homophily(tst)
        seer_10_df.loc[ds, :] = [ tr_hom1, tr_hom2, tr_hom3, val_hom1, val_hom2, val_hom3,tst_hom1, tst_hom2, tst_hom3 ]

wandb_seer_cli10_df= wandb.Table(dataframe=seer_10_df)
wandb.log({"citeseer_10_homophily" : wandb_seer_cli10_df})
                

        


       

In [11]:
n_clients = 20

seer_20_df= pd.DataFrame(columns = ['Tr Homophily v1','Tr Homophily v2', 'Tr Homophily v3',
                             'Val Homophily v1','Val Homophily v2', 'Val Homophily v3',
                             'Tst Homophily v1','Tst Homophily v2', 'Tst Homophily v3'])

for client_id in range(n_clients):
        ds = f'{client_id}-CiteSeer'
        partition = torch_load(datapath, f'CiteSeer_disjoint/{n_clients}/partition_{client_id}.pt')
        #Data objects
        tr, val, tst = partition['client_tr'], partition['client_val'] , partition['client_tst']
        tr_hom1, tr_hom2, tr_hom3 = get_homophily(tr)
        val_hom1, val_hom2, val_hom3 = get_homophily(val)
        tst_hom1, tst_hom2, tst_hom3 = get_homophily(tst)
        seer_20_df.loc[ds, :] = [ tr_hom1, tr_hom2, tr_hom3, val_hom1, val_hom2, val_hom3,tst_hom1, tst_hom2, tst_hom3 ]

wandb_seer_cli20_df= wandb.Table(dataframe=seer_20_df)
wandb.log({"citeseer_20_homophily" : wandb_seer_cli20_df})
                

        


       

## PubMed Dataset

In [12]:
n_clients = 5

pub_5_df= pd.DataFrame(columns = ['Tr Homophily v1','Tr Homophily v2', 'Tr Homophily v3',
                             'Val Homophily v1','Val Homophily v2', 'Val Homophily v3',
                             'Tst Homophily v1','Tst Homophily v2', 'Tst Homophily v3'])

for client_id in range(n_clients):
        ds = f'{client_id}-PubMed'
        partition = torch_load(datapath, f'PubMed_disjoint/{n_clients}/partition_{client_id}.pt')
        #Data objects
        tr, val, tst = partition['client_tr'], partition['client_val'] , partition['client_tst']
        tr_hom1, tr_hom2, tr_hom3 = get_homophily(tr)
        val_hom1, val_hom2, val_hom3 = get_homophily(val)
        tst_hom1, tst_hom2, tst_hom3 = get_homophily(tst)
        pub_5_df.loc[ds, :] = [ tr_hom1, tr_hom2, tr_hom3, val_hom1, val_hom2, val_hom3,tst_hom1, tst_hom2, tst_hom3 ]

wandb_pub_cli5_df= wandb.Table(dataframe=pub_5_df)
wandb.log({"pub_5_homophily" : wandb_pub_cli5_df})
                

        


       

In [17]:
n_clients = 10

pub_10_df= pd.DataFrame(columns = ['Tr Homophily v1','Tr Homophily v2', 'Val Homophily v1',
                                   'Val Homophily v2', 'Tst Homophily v1','Tst Homophily v2'])

for client_id in range(n_clients):
        ds = f'{client_id}-PubMed'
        partition = torch_load(datapath, f'PubMed_disjoint/{n_clients}/partition_{client_id}.pt')
        #Data objects
        tr, val, tst = partition['client_tr'], partition['client_val'] , partition['client_tst']
        tr_hom1, tr_hom2, _ = get_homophily(tr)
        val_hom1, val_hom2, _ = get_homophily(val)
        tst_hom1, tst_hom2, _ = get_homophily(tst)
        pub_10_df.loc[ds, :] = [ tr_hom1, tr_hom2, val_hom1, val_hom2,tst_hom1, tst_hom2 ]

wandb_pub_cli10_df= wandb.Table(dataframe=pub_10_df)
wandb.log({"pub_10_homophily" : wandb_pub_cli10_df})

AssertionError: 

In [14]:
n_clients = 20

pub_20_df= pd.DataFrame(columns = ['Tr Homophily v1','Tr Homophily v2', 'Tr Homophily v3',
                             'Val Homophily v1','Val Homophily v2', 'Val Homophily v3',
                             'Tst Homophily v1','Tst Homophily v2', 'Tst Homophily v3'])

for client_id in range(n_clients):
        ds = f'{client_id}-PubMed'
        partition = torch_load(datapath, f'PubMed_disjoint/{n_clients}/partition_{client_id}.pt')
        #Data objects
        tr, val, tst = partition['client_tr'], partition['client_val'] , partition['client_tst']
        tr_hom1, tr_hom2, tr_hom3 = get_homophily(tr)
        val_hom1, val_hom2, val_hom3 = get_homophily(val)
        tst_hom1, tst_hom2, tst_hom3 = get_homophily(tst)
        pub_20_df.loc[ds, :] = [ tr_hom1, tr_hom2, tr_hom3, val_hom1, val_hom2, val_hom3,tst_hom1, tst_hom2, tst_hom3 ]

wandb_pub_cli20_df= wandb.Table(dataframe=pub_20_df)
wandb.log({"pub_20_homophily" : wandb_pub_cli20_df})
                


In [18]:
wandb.finish()

