In [9]:
import glob
import random
from preprocess import *
from torch_geometric.loader import DataLoader

labels = {"circle": 0, "square": 1, "star": 2, "triangle": 3}

dataset = []
for label in labels:
    image_paths = glob.glob(f"shapes/{label}/*")
    random.shuffle(image_paths)
    for path in image_paths[:800]:
        data = create_graph_from_contour(path, labels[label])
        dataset.append(data)

In [2]:
from sklearn.model_selection import train_test_split
# データセットの分割
train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)
train_data, val_data = train_test_split(train_data, test_size=0.2, random_state=42)

# データローダーの作成
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

print(f"訓練データ数: {len(train_data)}")
print(f"検証データ数: {len(val_data)}")
print(f"テストデータ数: {len(test_data)}")

訓練データ数: 2048
検証データ数: 512
テストデータ数: 640


データの準備

In [10]:
import networkx as nx

graphs = []
labels = []
for data in dataset:
    # Create Networkx.classes
    e_list = []
    tensor_edgelist = data.edge_index
    for i in range(len(tensor_edgelist[0])):
        e_list.append((int(tensor_edgelist[0][i]), int(tensor_edgelist[1][i])))
    g = nx.from_edgelist(e_list)

    # Load features
    x = data.x
    #nx.set_node_attributes(g, {j: x[j] for j in range(g.number_of_nodes())}, "feature")
    nx.set_node_attributes(g, {j: str(j) for j in range(g.number_of_nodes())}, "feature")

    # Checking the consecutive numeric indexing.
    node_indices = sorted([node for node in g.nodes()])
    numeric_indices = [index for index in range(g.number_of_nodes())]

    if numeric_indices == node_indices:
        graphs.append(g)
        labels.append(int(data.y))
    else:
        pass

In [11]:
import matplotlib.pyplot as plt
from karateclub.graph_embedding import Graph2Vec

model = Graph2Vec(wl_iterations=2, use_node_attribute="feature", dimensions=256, 
                  down_sampling=0.0001, epochs=100, learning_rate=0.025, min_count=10)
model.fit(graphs)
emb = model.get_embedding() # (1108, 128)

plt.show()

ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [7]:
import torch.nn.functional as F

criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        optimizer.zero_grad()
        out = model(data)
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

def evaluate(loader):
    model.eval()
    correct = 0
    for data in loader:
        with torch.no_grad():
            out = model(data)
            pred = out.argmax(dim=1)
            correct += int((pred == data.y).sum())
    return correct / len(loader.dataset)

# トレーニングの実行
best_val_acc = 0
for epoch in range(200):
    train_loss = train()
    train_acc = evaluate(train_loader)
    val_acc = evaluate(val_loader)
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'best_model.pth')
    
    print(f'Epoch: {epoch}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}')

# ベストモデルのロードとテストデータでの評価
model.load_state_dict(torch.load('best_model.pth'))
test_acc = evaluate(test_loader)
print(f'Test Accuracy: {test_acc:.4f}')

Epoch: 0, Train Loss: 1.3795, Train Acc: 0.3501, Val Acc: 0.3262
Epoch: 1, Train Loss: 1.1763, Train Acc: 0.3721, Val Acc: 0.3301
Epoch: 2, Train Loss: 0.9424, Train Acc: 0.6909, Val Acc: 0.7051
Epoch: 3, Train Loss: 0.7150, Train Acc: 0.7671, Val Acc: 0.7910
Epoch: 4, Train Loss: 0.5688, Train Acc: 0.5659, Val Acc: 0.5762
Epoch: 5, Train Loss: 0.4782, Train Acc: 0.5781, Val Acc: 0.6016
Epoch: 6, Train Loss: 0.3396, Train Acc: 0.6162, Val Acc: 0.6289
Epoch: 7, Train Loss: 0.2796, Train Acc: 0.6240, Val Acc: 0.6387
Epoch: 8, Train Loss: 0.2737, Train Acc: 0.5015, Val Acc: 0.5273
Epoch: 9, Train Loss: 0.2391, Train Acc: 0.4971, Val Acc: 0.5117
Epoch: 10, Train Loss: 0.2148, Train Acc: 0.6348, Val Acc: 0.6309
Epoch: 11, Train Loss: 0.1982, Train Acc: 0.6553, Val Acc: 0.6504
Epoch: 12, Train Loss: 0.2213, Train Acc: 0.6274, Val Acc: 0.6270
Epoch: 13, Train Loss: 0.2185, Train Acc: 0.4121, Val Acc: 0.4277
Epoch: 14, Train Loss: 0.2310, Train Acc: 0.6436, Val Acc: 0.6309
Epoch: 15, Train Los

In [31]:
test_data = []
for label in labels:
    image_paths = glob.glob(f"shapes/circle/*")
    for path in image_paths[600:610]:
        data = create_graph_from_contour(path, labels[label])
        test_data.append(data)

In [7]:
model.load_state_dict(torch.load('best_model.pth'))

<All keys matched successfully>

In [9]:
evaluate(test_loader)

0.7671875

In [23]:
test_data = [
	create_graph_from_contour("shikaku.png", 1),
	create_graph_from_contour("circle.png", 0),
	create_graph_from_contour("star.png", 2),
	create_graph_from_contour("triangle.png", 3)
]

In [62]:
test_data = [
	create_graph_from_contour("0.png", 3),
	create_graph_from_contour("96.png", 2),
]

In [24]:
# 推論
with torch.no_grad():
	for data in test_data:
		output = model(data)  # バッチサイズ1として扱う
		probabilities = torch.exp(output)
		predicted_class = output.argmax(dim=1).item()
		confidence = probabilities[0][predicted_class].item()
	
		print(f"予測されたクラス:", predicted_class)
		print(f"信頼度: {confidence:.4f}")

予測されたクラス: 0
信頼度: 781.2241
予測されたクラス: 0
信頼度: 361.2181
予測されたクラス: 2
信頼度: 10.9493
予測されたクラス: 3
信頼度: 3.6324
