### Final project of Complex Network Analysis
### Majid Adibian, 402131910

## Question 3: 

In [7]:
# random seed setting
import torch
import numpy as np

from Q1.utils import get_datasets, split_data
from Q1.train import evaluate_model, train_model
from Q1.model import GCNModel, NewGCNModel

seed = 43
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

In [2]:
CoraFull_dataset, CiteSeer_dataset = get_datasets()
CoraFull_dataset, CiteSeer_dataset = split_data([CoraFull_dataset, CiteSeer_dataset])

### Train DropEdge model on CoraFull dataset

##### With drop edge

In [13]:
!python Q3/train.py --dataset corafull --num_layers 2 --hidden 256 \
    --epoch 200 --lr 0.001 --sampling_percent 0.05 --dropout 0.9 \
    --normalization BingGeNormAdj --batch_norm --device cuda \
    --result_path "Q3/results/CuraFull_with_dropedge.png"

Epoch 200 | train_loss: 0.160 | train_acc: 0.962 | val_acc: 0.717: 100%|█| 200/2
accuracy on test dataset: 0.7243975400924683


##### Without drop edge

In [16]:
## Using code of DropEdge paper
!python Q3/train.py --dataset corafull --num_layers 2 --hidden 256 \
    --epoch 200 --lr 0.001 --sampling_percent 1 --dropout 0.9 \
    --normalization BingGeNormAdj --batch_norm --device cuda \
    --result_path "Q3/results/CuraFull_without_dropedge.png"

Epoch 200 | train_loss: 0.077 | train_acc: 0.988 | val_acc: 0.678: 100%|█| 200/2
accuracy on test dataset: 0.6997991800308228


In [25]:
## Using code of question 1
device = "cuda"
dataset = CoraFull_dataset
model = GCNModel(dataset["nodes"].shape[1], dataset["num_classes"], 256, 2).to(device)
model, _, _ = train_model(model, dataset, device=device, epoches=200, lr=0.009)
data = {"nodes":dataset["nodes"].to(device), "edges":dataset["edges"].to(device), "labels":dataset["labels"].to(device)}
test_accuracy = evaluate_model(model, data, dataset["test_indexes"])
print("accuracy on test dataset: {:.4f}".format(test_accuracy))

accuracy on test dataset: 0.6872


### Train DropEdge model on CiteSeer dataset

##### With drop edge

In [23]:
!python Q3/train.py --dataset citeseer --num_layers 2 \
    --hidden 256 --epoch 200 --lr 0.009 --sampling_percent 0.05 \
    --dropout 0.9 --normalization BingGeNormAdj --batch_norm --device cuda \
    --result_path "Q3/results/CiteSeer_with_dropedge.png"

Epoch 200 | train_loss: 0.190 | train_acc: 0.945 | val_acc: 0.760: 100%|█| 200/2
accuracy on test dataset: 0.7664670944213867


##### Without drop edge

In [24]:
## Using code of DropEdge paper
!python Q3/train.py --dataset citeseer --num_layers 2 \
    --hidden 256 --epoch 200 --lr 0.009 --sampling_percent 1 \
    --dropout 0.9 --normalization BingGeNormAdj --batch_norm --device cuda \
    --result_path "Q3/results/CiteSeer_without_dropedge.png"

Epoch 200 | train_loss: 0.182 | train_acc: 0.940 | val_acc: 0.730: 100%|█| 200/2
accuracy on test dataset: 0.7095808386802673


In [24]:
## Using code of question 1
device = "cuda"
dataset = CiteSeer_dataset
model = GCNModel(dataset["nodes"].shape[1], dataset["num_classes"], hidden_size=256, num_layers=2).to(device)
model, _, _ = train_model(model, dataset, device=device, epoches=200, lr=0.009)
data = {"nodes":dataset["nodes"].to(device), "edges":dataset["edges"].to(device), "labels":dataset["labels"].to(device)}
test_accuracy = evaluate_model(model, data, dataset["test_indexes"])
print("accuracy on test dataset: {:.4f}".format(test_accuracy))

accuracy on test dataset: 0.7081


### Question 3.f: Set number of layers to 8 and evaluate the effect of drop edge

##### CuraFull dataset

In [15]:
## With drop edge
!python Q3/train.py --dataset corafull --num_layers 8 \
    --hidden 256 --epoch 200 --lr 0.001 --sampling_percent 0.05 \
    --dropout 0.2 --normalization BingGeNormAdj --batch_norm --device cuda \
    --result_path "Q3/results/CuraFull_8_layers_with_dropedge.png"

Epoch 200 | train_loss: 0.483 | train_acc: 0.900 | val_acc: 0.668: 100%|█| 200/2
accuracy on test dataset: 0.6887549757957458


In [16]:
## Without drop edge Using code of DropEdge paper 
!python Q3/train.py --dataset corafull --num_layers 8 \
    --hidden 256 --epoch 200 --lr 0.001 --sampling_percent 1 \
    --dropout 0.2 --normalization BingGeNormAdj --batch_norm --device cuda \
    --result_path "Q3/results/CuraFull_8_layers_without_dropedge.png"

Epoch 200 | train_loss: 0.172 | train_acc: 0.942 | val_acc: 0.618: 100%|█| 200/2
accuracy on test dataset: 0.6335341334342957


In [5]:
## Without drop edge Using code of question 1
device = "cuda"
dataset = CoraFull_dataset
model = GCNModel(dataset["nodes"].shape[1], dataset["num_classes"], hidden_size=256, num_layers=8).to(device)
model, _, _ = train_model(model, dataset, device=device, epoches=200, lr=0.001)
data = {"nodes":dataset["nodes"].to(device), "edges":dataset["edges"].to(device), "labels":dataset["labels"].to(device)}
test_accuracy = evaluate_model(model, data, dataset["test_indexes"])
print("accuracy on test dataset: {:.4f}".format(test_accuracy))

Epoch 200 | train_loss: 2.122 | train_acc: 0.425 | val_acc: 0.471: 100%|██████████| 200/200 [02:27<00:00,  1.36it/s]


accuracy on test dataset: 0.4799


##### CiteSeer dataset

In [129]:
## With drop edge
!python Q3/train.py --dataset citeseer --num_layers 8 \
    --hidden 64 --epoch 500 --lr 0.009 --sampling_percent 0.05 \
    --dropout 0.2 --normalization BingGeNormAdj --batch_norm --device cuda \
    --result_path "Q3/results/CiteSeer_8_layers_with_dropedge.png"

Epoch 500 | train_loss: 0.185 | train_acc: 0.954 | val_acc: 0.712: 100%|█| 500/5
accuracy on test dataset: 0.7365269660949707


In [58]:
## Without drop edge Using code of DropEdge paper 
!python Q3/train.py --dataset citeseer --num_layers 8 \
    --hidden 64 --epoch 500 --lr 0.009 --sampling_percent 1 \
    --dropout 0.2 --normalization BingGeNormAdj --batch_norm --device cuda \
    --result_path "Q3/results/CiteSeer_8_layers_without_dropedge.png"

Epoch 500 | train_loss: 0.125 | train_acc: 0.941 | val_acc: 0.694: 100%|█| 500/5
accuracy on test dataset: 0.688622772693634


In [6]:
## Without drop edge Using code of question 1
device = "cuda"
dataset = CiteSeer_dataset
model = GCNModel(dataset["nodes"].shape[1], dataset["num_classes"], hidden_size=64, num_layers=8).to(device)
model, _, _ = train_model(model, dataset, device=device, epoches=500, lr=0.009)
data = {"nodes":dataset["nodes"].to(device), "edges":dataset["edges"].to(device), "labels":dataset["labels"].to(device)}
test_accuracy = evaluate_model(model, data, dataset["test_indexes"])
print("accuracy on test dataset: {:.4f}".format(test_accuracy))

  0%|          | 0/500 [00:00<?, ?it/s]

Epoch 500 | train_loss: 1.157 | train_acc: 0.585 | val_acc: 0.592: 100%|██████████| 500/500 [00:18<00:00, 27.29it/s]


accuracy on test dataset: 0.5629


### Question 3.h: Use skip-connection 

##### CuraFull dataset

In [17]:
## Using code of DropEdge without edge removing
!python Q3/train.py --dataset corafull --num_layers 8 --skip_connection \
    --hidden 256 --epoch 200 --lr 0.001 --sampling_percent 1 \
    --dropout 0.2 --normalization BingGeNormAdj --batch_norm --device cuda \
    --result_path "Q3/results/CoraFull_8_layers_with_skip_connection.png"

Epoch 200 | train_loss: 0.155 | train_acc: 0.970 | val_acc: 0.655: 100%|█| 200/2
accuracy on test dataset: 0.6679216623306274


In [3]:
## Using code of question 1
device = "cuda"
dataset = CoraFull_dataset
model = GCNModel(dataset["nodes"].shape[1], dataset["num_classes"], hidden_size=256, num_layers=8, skip_connection=True).to(device)
model, _, _ = train_model(model, dataset, device=device, epoches=200, lr=0.001)
data = {"nodes":dataset["nodes"].to(device), "edges":dataset["edges"].to(device), "labels":dataset["labels"].to(device)}
test_accuracy = evaluate_model(model, data, dataset["test_indexes"])
print("accuracy on test dataset: {:.4f}".format(test_accuracy))

Epoch 200 | train_loss: 0.196 | train_acc: 0.948 | val_acc: 0.684: 100%|██████████| 200/200 [02:27<00:00,  1.35it/s]


accuracy on test dataset: 0.6988


##### CiteSeer dataset

In [135]:
## Using code of DropEdge without edge removing
!python Q3/train.py --dataset citeseer --num_layers 8 --skip_connection \
    --hidden 64 --epoch 500 --lr 0.009 --sampling_percent 1 \
    --dropout 0.2 --normalization BingGeNormAdj --batch_norm --device cuda \
    --result_path "Q3/results/CiteSeer_8_layers_with_skip_connection.png"

Epoch 500 | train_loss: 0.003 | train_acc: 1.000 | val_acc: 0.706: 100%|█| 500/5
accuracy on test dataset: 0.6991018056869507


In [4]:
## Using code of question 1
device = "cuda"
dataset = CiteSeer_dataset
model = GCNModel(dataset["nodes"].shape[1], dataset["num_classes"], hidden_size=64, num_layers=8, skip_connection=True).to(device)
model, _, _ = train_model(model, dataset, device=device, epoches=500, lr=0.009)
data = {"nodes":dataset["nodes"].to(device), "edges":dataset["edges"].to(device), "labels":dataset["labels"].to(device)}
test_accuracy = evaluate_model(model, data, dataset["test_indexes"])
print("accuracy on test dataset: {:.4f}".format(test_accuracy))

  0%|          | 0/500 [00:00<?, ?it/s]

Epoch 500 | train_loss: 0.215 | train_acc: 0.913 | val_acc: 0.751: 100%|██████████| 500/500 [00:18<00:00, 27.60it/s]


accuracy on test dataset: 0.7231


### Question 3.i: New idea to solve over-smoothing problem

##### CuraFull dataset

In [19]:
## Using code of DropEdge without edge removing
!python Q3/train.py --dataset corafull --num_layers 8 --new_model \
    --hidden 256 --epoch 200 --lr 0.001 --sampling_percent 1 \
    --dropout 0.2 --normalization BingGeNormAdj --batch_norm --device cuda \
    --result_path "Q3/results/CoraFull_8_layers_with_new_idea.png"

Epoch 200 | train_loss: 0.174 | train_acc: 0.943 | val_acc: 0.617: 100%|█| 200/2
accuracy on test dataset: 0.6335341334342957


In [10]:
## Using code of question 1
device = "cuda"
dataset = CiteSeer_dataset
model = NewGCNModel(dataset["nodes"].shape[1], dataset["num_classes"], hidden_size=256, num_layers=8, dropout=0.2).to(device)
model, _, _ = train_model(model, dataset, device=device, epoches=200, lr=0.001)
data = {"nodes":dataset["nodes"].to(device), "edges":dataset["edges"].to(device), "labels":dataset["labels"].to(device)}
test_accuracy = evaluate_model(model, data, dataset["test_indexes"])
print("accuracy on test dataset: {:.4f}".format(test_accuracy))

Epoch 200 | train_loss: 0.201 | train_acc: 0.904 | val_acc: 0.739: 100%|██████████| 200/200 [00:08<00:00, 23.12it/s]

accuracy on test dataset: 0.7066





##### CiteSeer dataset

In [13]:
## Using code of DropEdge without edge removing
!python Q3/train.py --dataset citeseer --num_layers 8 --new_model \
    --hidden 64 --epoch 200 --lr 0.009 --sampling_percent 1 \
    --dropout 0.2 --normalization BingGeNormAdj --batch_norm --device cuda \
    --result_path "Q3/results/CiteSeer_8_layers_with_new_idea.png"

Epoch 200 | train_loss: 0.259 | train_acc: 0.908 | val_acc: 0.697: 100%|█| 200/2
accuracy on test dataset: 0.7050898671150208


In [11]:
## Using code of question 1
device = "cuda"
dataset = CiteSeer_dataset
model = NewGCNModel(dataset["nodes"].shape[1], dataset["num_classes"], hidden_size=64, num_layers=8, dropout=0.2).to(device)
model, _, _ = train_model(model, dataset, device=device, epoches=200, lr=0.009)
data = {"nodes":dataset["nodes"].to(device), "edges":dataset["edges"].to(device), "labels":dataset["labels"].to(device)}
test_accuracy = evaluate_model(model, data, dataset["test_indexes"])
print("accuracy on test dataset: {:.4f}".format(test_accuracy))

  0%|          | 0/200 [00:00<?, ?it/s]

Epoch 200 | train_loss: 0.360 | train_acc: 0.851 | val_acc: 0.742: 100%|██████████| 200/200 [00:07<00:00, 25.87it/s]


accuracy on test dataset: 0.7126
