In [2]:
# import torch
# print(f"PyTorch version: {torch.__version__}")
# print(f"CUDA version: {torch.version.cuda}")
# print(f"CUDA available: {torch.cuda.is_available()}")

# # Xóa các thư viện cũ để tránh xung đột
# !pip uninstall -y torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric

# # Cài đặt torch-scatter
# !pip install torch-scatter -f https://data.pyg.org/whl/torch-2.5.1+cu124.html

# # Cài đặt torch-sparse
# !pip install torch-sparse -f https://data.pyg.org/whl/torch-2.5.1+cu124.html

# # Cài đặt torch-cluster
# !pip install torch-cluster -f https://data.pyg.org/whl/torch-2.5.1+cu124.html

# # Cài đặt torch-spline-conv
# !pip install torch-spline-conv -f https://data.pyg.org/whl/torch-2.5.1+cu124.html
# # Cài đặt torch-geometric
# !pip install torch-geometric

In [3]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv
from torch_geometric.data import Data
from torch_geometric.loader import NeighborSampler
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import gc
import os
from collections import defaultdict

In [4]:
# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Clear memory
gc.collect()
torch.cuda.empty_cache()

# Kiểm tra GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if device.type == 'cuda':
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")

Using device: cuda
GPU Memory: 2.15 GB
GPU Name: NVIDIA GeForce MX450


In [5]:
# Đọc dữ liệu từ Kaggle
train_df = pd.read_csv('data2/train.csv')
test_df = pd.read_csv('data2/test.csv')

# Feature engineering: Add Price * Super Area
train_df['Price_x_SuperArea'] = train_df['Price'] * train_df['Super Area']
test_df['Price_x_SuperArea'] = test_df['Price'] * test_df['Super Area']

# Thêm cột index làm house_id
train_df['index'] = train_df.index
test_df['index'] = test_df.index

In [6]:
# Hàm để gán node vào lá của một cây (độ sâu 3)
def assign_to_leaf(df):
    leaf_ids = np.zeros(len(df), dtype=np.int32)
    for idx in range(len(df)):
        price = df.iloc[idx]['Price']
        super_area = df.iloc[idx]['Super Area']
        carpet_area = df.iloc[idx]['Carpet Area']
        total_floors = df.iloc[idx]['Total Floors']
        
        if price <= 10308.50:
            if super_area <= 1800.04:
                if carpet_area <= 1915.50:
                    if price <= 5056.50:
                        leaf_ids[idx] = 0
                    else:
                        leaf_ids[idx] = 1
                else:
                    if total_floors <= 4.50:
                        leaf_ids[idx] = 2
                    else:
                        leaf_ids[idx] = 3
            else:
                if price <= 6952.00:
                    if super_area <= 3375.00:
                        leaf_ids[idx] = 4
                    else:
                        leaf_ids[idx] = 5
                else:
                    if super_area <= 3331.01:
                        leaf_ids[idx] = 6
                    else:
                        leaf_ids[idx] = 7
        else:
            if super_area <= 2592.52:
                if price <= 18905.50:
                    if super_area <= 1450.00:
                        leaf_ids[idx] = 8
                    else:
                        leaf_ids[idx] = 9
                else:
                    if super_area <= 1150.00:
                        leaf_ids[idx] = 10
                    else:
                        leaf_ids[idx] = 11
            else:
                if price <= 29375.00:
                    if price <= 15997.50:
                        leaf_ids[idx] = 12
                    else:
                        leaf_ids[idx] = 13
                else:
                    if super_area <= 3924.96:
                        leaf_ids[idx] = 14
                    else:
                        leaf_ids[idx] = 15
    return leaf_ids


In [7]:
# Hàm tiền xử lý dữ liệu
def preprocess_data(df, numerical_cols, categorical_cols, target_col, 
                    scaler=None, encoders=None, target_encoders=None, target_scaler=None, is_train=True):
    df = df.copy()
    
    target_encode_cols = ['Society', 'Location', 'Overlooking']
    
    if is_train:
        scaler = MinMaxScaler()
        df[numerical_cols] = scaler.fit_transform(df[numerical_cols])
        
        encoders = {}
        target_encoders = {}
        target_scaler = MinMaxScaler()
        
        encoded_features = []
        one_hot_cols = ['Transaction', 'Furnishing', 'Ownership', 'Facing']
        for col in one_hot_cols:
            encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
            encoded = encoder.fit_transform(df[[col]])
            encoded_df = pd.DataFrame(
                encoded, 
                columns=[f"{col}_{cat}" for cat in encoder.categories_[0]]
            )
            encoded_features.append(encoded_df)
            encoders[col] = encoder
        
        target_encoded = []
        for col in target_encode_cols:
            mean_target = df.groupby(col)[target_col].mean()
            df[f'{col}_encoded'] = df[col].map(mean_target)
            target_encoders[col] = mean_target
            target_encoded.append(df[[f'{col}_encoded']])
        
        target_encoded_df = pd.concat(target_encoded, axis=1)
        scaled_target_encoded = target_scaler.fit_transform(target_encoded_df)
        for i, col in enumerate(target_encode_cols):
            df[f'{col}_encoded'] = scaled_target_encoded[:, i]
            encoded_features.append(df[[f'{col}_encoded']])
    else:
        if scaler is None or encoders is None or target_encoders is None or target_scaler is None:
            raise ValueError("Scaler, encoders, target_encoders, and target_scaler must be provided for is_train=False")
        
        df[numerical_cols] = scaler.transform(df[numerical_cols])
        
        encoded_features = []
        for col in ['Transaction', 'Furnishing', 'Ownership', 'Facing']:
            encoded = encoders[col].transform(df[[col]])
            encoded_df = pd.DataFrame(
                encoded, 
                columns=[f"{col}_{cat}" for cat in encoders[col].categories_[0]]
            )
            encoded_features.append(encoded_df)
        
        target_encoded = []
        for col in target_encode_cols:
            default_value = df[target_col].mean() if target_col in df else 0
            if target_encoders and col in target_encoders:
                df[f'{col}_encoded'] = df[col].map(target_encoders[col]).fillna(default_value)
            else:
                df[f'{col}_encoded'] = default_value
            target_encoded.append(df[[f'{col}_encoded']])
        
        target_encoded_df = pd.concat(target_encoded, axis=1)
        scaled_target_encoded = target_scaler.transform(target_encoded_df)
        for i, col in enumerate(target_encode_cols):
            df[f'{col}_encoded'] = scaled_target_encoded[:, i]
            encoded_features.append(df[[f'{col}_encoded']])
    
    feature_df = pd.concat([df[numerical_cols]] + encoded_features, axis=1)
    y = np.log1p(df[target_col].values) if target_col in df else None
    
    return feature_df, y, scaler, encoders, target_encoders, target_scaler

In [8]:
# Preprocess train data
numerical_cols = ['Carpet Area', 'Super Area', 'Bathroom', 'Balcony', 'Current Floor', 
                  'Total Floors', 'BHK', 'Price', 'Car Parking', 'Price_x_SuperArea']
categorical_cols = ['Transaction', 'Furnishing', 'Overlooking', 'Ownership', 'Facing']
target_col = 'Amount'

features, y, scaler, encoders, target_encoders, target_scaler = preprocess_data(
    train_df, numerical_cols, categorical_cols, target_col, is_train=True
)

In [9]:
# Tạo SIMILAR_LEAF edges từ cấu trúc cây (Tối ưu hơn)
print("Creating SIMILAR_LEAF edges from Random Forest structure...")
leaf_indices = np.zeros((len(train_df), 3), dtype=np.int32)
for tree_idx in range(3):
    leaf_indices[:, tree_idx] = assign_to_leaf(train_df)

# Nhóm các node theo lá
leaf_groups = defaultdict(list)
for idx in range(len(train_df)):
    leaf_tuple = tuple(leaf_indices[idx])
    leaf_groups[leaf_tuple].append(idx)

Creating SIMILAR_LEAF edges from Random Forest structure...


In [10]:
# Tạo file tạm để lưu trữ edges và weights
edges_file = 'store/similar_leaf_edges.npy'
weights_file = 'store/similar_leaf_weights.npy'
if os.path.exists(edges_file):
    os.remove(edges_file)
if os.path.exists(weights_file):
    os.remove(weights_file)

# Biến đếm số lượng cạnh
total_edges = 0
amounts = train_df['Amount'].values
batch_size = 1000  # Xử lý theo batch để giảm bộ nhớ

In [11]:
# Tạo cạnh chỉ giữa các node trong cùng nhóm lá
for leaf_tuple, node_indices in leaf_groups.items():
    if len(node_indices) < 2:
        continue
    print(f"Processing leaf group with {len(node_indices)} nodes...")
    node_indices = np.array(node_indices)
    
    # Xử lý theo batch
    for start in range(0, len(node_indices), batch_size):
        end = min(start + batch_size, len(node_indices))
        batch_nodes = node_indices[start:end]
        
        batch_edges = []
        batch_weights = []
        for idx_i in range(len(batch_nodes)):
            i = batch_nodes[idx_i]
            for idx_j in range(idx_i + 1, len(batch_nodes)):
                j = batch_nodes[idx_j]
                same_leaf_count = np.sum(leaf_indices[i] == leaf_indices[j])
                freq = same_leaf_count / 3
                price_similarity = abs(amounts[i] - amounts[j]) / (amounts[i] + amounts[j] + 1e-5)
                if price_similarity < 0.5 and freq > 0.7:  # Tăng ngưỡng để giảm số lượng cạnh
                    weight = freq * (1.0 - price_similarity)
                    batch_edges.append([i, j])
                    batch_edges.append([j, i])
                    batch_weights.append(weight)
                    batch_weights.append(weight)
        
        # Ghi batch_edges và batch_weights vào file
        if batch_edges:
            batch_edges_array = np.array(batch_edges, dtype=np.int64)
            batch_weights_array = np.array(batch_weights, dtype=np.float32)
            with open(edges_file, 'ab') as f:
                np.save(f, batch_edges_array)
            with open(weights_file, 'ab') as f:
                np.save(f, batch_weights_array)
            total_edges += len(batch_edges)
        
        # Giải phóng bộ nhớ
        del batch_edges, batch_weights
        gc.collect()

Processing leaf group with 2298 nodes...
Processing leaf group with 10087 nodes...
Processing leaf group with 1917 nodes...
Processing leaf group with 12382 nodes...
Processing leaf group with 3027 nodes...
Processing leaf group with 566 nodes...
Processing leaf group with 1883 nodes...
Processing leaf group with 302 nodes...
Processing leaf group with 541 nodes...
Processing leaf group with 411 nodes...
Processing leaf group with 170 nodes...
Processing leaf group with 124 nodes...
Processing leaf group with 144 nodes...
Processing leaf group with 15 nodes...
Processing leaf group with 125 nodes...
Processing leaf group with 8 nodes...


In [12]:
# Đọc lại edges và weights từ file
print(f"Total SIMILAR_LEAF edges: {total_edges}")
if total_edges > 0:
    all_edges = np.empty((total_edges, 2), dtype=np.int64)
    all_weights = np.empty(total_edges, dtype=np.float32)
    
    edges_pos = 0
    with open(edges_file, 'rb') as f_edges, open(weights_file, 'rb') as f_weights:
        while edges_pos < total_edges:
            chunk_edges = np.load(f_edges)
            chunk_weights = np.load(f_weights)
            chunk_size = len(chunk_edges)
            all_edges[edges_pos:edges_pos + chunk_size] = chunk_edges
            all_weights[edges_pos:edges_pos + chunk_size] = chunk_weights
            edges_pos += chunk_size
else:
    all_edges = np.empty((0, 2), dtype=np.int64)
    all_weights = np.empty(0, dtype=np.float32)

# Xóa file tạm
os.remove(edges_file)
os.remove(weights_file)

# Chuyển sang tensor
edge_index = torch.tensor(all_edges, dtype=torch.long).t().contiguous()
edge_weight = torch.tensor(all_weights, dtype=torch.float)

Total SIMILAR_LEAF edges: 29841204


In [13]:
del all_edges, all_weights
gc.collect()

# Tạo dữ liệu cho PyTorch Geometric
data = Data(
    x=torch.tensor(features.values, dtype=torch.float),
    edge_index=edge_index,
    edge_attr=edge_weight,
    y=torch.tensor(y, dtype=torch.float)
)

print(f"Graph: {data.num_nodes} nodes, {data.num_edges} edges")

# Lưu đồ thị vào file .npz
print("Saving graph to .npz file...")
np.savez(
    'store/graph_data.npz',
    x=data.x.cpu().numpy(),
    edge_index=data.edge_index.cpu().numpy(),
    edge_attr=data.edge_attr.cpu().numpy(),
    y=data.y.cpu().numpy()
)

Graph: 34000 nodes, 29841204 edges
Saving graph to .npz file...


In [14]:
# Sử dụng NeighborSampler để tạo DataLoader
train_loader = NeighborSampler(
    data.edge_index,
    node_idx=None,
    sizes=[15, 10, 5],
    batch_size=1024,
    shuffle=True,
    num_workers=0
)

In [15]:
# GNN Model with GATConv
class GNNModel(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim=32):
        super(GNNModel, self).__init__()
        self.conv1 = GATConv(input_dim, hidden_dim, heads=1, concat=True)
        self.bn1 = torch.nn.BatchNorm1d(hidden_dim)
        self.conv2 = GATConv(hidden_dim, 16, heads=1, concat=True)
        self.bn2 = torch.nn.BatchNorm1d(16)
        self.conv3 = GATConv(16, 8, heads=1, concat=False)
        self.bn3 = torch.nn.BatchNorm1d(8)
        self.fc = torch.nn.Linear(8, 1)
    
    def forward(self, x, edge_index, edge_weight, batch_size):
        x = F.relu(self.bn1(self.conv1(x, edge_index, edge_attr=edge_weight)))
        x = F.dropout(x, p=0.3, training=self.training)
        x = F.relu(self.bn2(self.conv2(x, edge_index, edge_attr=edge_weight)))
        x = F.dropout(x, p=0.3, training=self.training)
        x = F.relu(self.bn3(self.conv3(x, edge_index, edge_attr=edge_weight)))
        x = self.fc(x)
        return x[:batch_size].squeeze()

In [16]:
# Huấn luyện trên GPU với DataLoader
model = GNNModel(input_dim=features.shape[1]).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10, factor=0.5)

# Chuyển features và labels sang CPU
data.x = data.x.to('cpu')
data.y = data.y.to('cpu')
data.edge_index = data.edge_index.to('cpu')
data.edge_attr = data.edge_attr.to('cpu')

In [None]:
model.train()
for epoch in range(400):
    total_loss = 0
    for batch_size, n_id, adjs in train_loader:
        adj = adjs[-1]  # Lấy tầng cuối cùng
        edge_index = adj.edge_index.to(device)
        
        # Lấy edge_weight tương ứng với edge_index của batch
        edge_weight = data.edge_attr[adj.e_id] if adj.e_id is not None and len(adj.e_id) > 0 else None
        if edge_weight is not None:
            edge_weight = edge_weight.to(device)
        else:
            edge_weight = torch.ones(edge_index.size(1), device=device)  # Mặc định trọng số 1 nếu không có

        x_batch = data.x[n_id].to(device)
        y_batch = data.y[n_id[:batch_size]].to(device)
        
        optimizer.zero_grad()
        out = model(x_batch, edge_index, edge_weight, batch_size)
        loss = F.mse_loss(out, y_batch)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        total_loss += loss.item() * batch_size
    
    total_loss /= data.num_nodes
    scheduler.step(total_loss)
    
    if (epoch + 1) % 50 == 0:
        print(f'Epoch {epoch + 1}, Loss: {total_loss:.4f}')
        with torch.no_grad():
            sample_pred = out[:5].cpu().numpy()
            sample_true = y_batch[:5].cpu().numpy()
            print(f"Sample Predictions: {sample_pred}")
            print(f"Sample True Values: {sample_true}")

Epoch 50, Loss: 0.1048
Sample Predictions: [15.429655 18.172083 15.634981 15.150665 17.145903]
Sample True Values: [15.424949 18.064005 15.384127 14.84513  17.034386]
Epoch 100, Loss: 0.0809
Sample Predictions: [16.129534 15.601461 15.3193   16.160769 16.90637 ]
Sample True Values: [16.489658  15.4641695 15.068274  15.894952  16.677711 ]
Epoch 150, Loss: 0.0728
Sample Predictions: [16.99467  16.51823  16.217943 17.280272 16.183828]
Sample True Values: [16.811243 16.34124  15.907374 16.77042  15.761421]
Epoch 200, Loss: 0.0700
Sample Predictions: [16.401901 15.586923 15.754505 16.484446 15.222102]
Sample True Values: [16.38046  15.444752 16.257858 16.666218 14.946913]
Epoch 250, Loss: 0.0670
Sample Predictions: [15.6609745 16.421482  15.549066  14.523319  15.318489 ]
Sample True Values: [15.775605 16.562782 15.955577 13.764218 15.575091]
Epoch 300, Loss: 0.0674
Sample Predictions: [16.991417 14.652505 14.871898 17.87951  17.825125]
Sample True Values: [16.98939  14.64842  14.914124 17.5

In [None]:
# Preprocess test data
numerical_cols_input = ['Carpet Area', 'Super Area', 'Bathroom', 'Balcony', 'Current Floor', 
                        'Total Floors', 'BHK', 'Price', 'Car Parking', 'Price_x_SuperArea']
categorical_cols_input = ['Transaction', 'Furnishing', 'Overlooking', 'Ownership', 'Facing']
target_col = 'Amount'

# Đảm bảo các giá trị từ huấn luyện được sử dụng
test_features, test_y, _, _, _, _ = preprocess_data(
    test_df, numerical_cols_input, categorical_cols_input, target_col, 
    scaler=scaler, encoders=encoders, target_encoders=target_encoders, target_scaler=target_scaler, is_train=False
)

In [None]:
# Tạo đồ thị cho test (dùng SIMILAR_LEAF tương tự)
test_leaf_indices = np.zeros((len(test_df), 3), dtype=np.int32)
for tree_idx in range(3):
    test_leaf_indices[:, tree_idx] = assign_to_leaf(test_df)

test_leaf_groups = defaultdict(list)
for idx in range(len(test_df)):
    leaf_tuple = tuple(test_leaf_indices[idx])
    test_leaf_groups[leaf_tuple].append(idx)

test_edges_file = 'store/test_similar_leaf_edges.npy'
test_weights_file = 'store/test_similar_leaf_weights.npy'
if os.path.exists(test_edges_file):
    os.remove(test_edges_file)
if os.path.exists(test_weights_file):
    os.remove(test_weights_file)

test_total_edges = 0
test_amounts = test_df['Amount'].values

In [None]:
for leaf_tuple, node_indices in test_leaf_groups.items():
    if len(node_indices) < 2:
        continue
    print(f"Processing test leaf group with {len(node_indices)} nodes...")
    node_indices = np.array(node_indices)
    
    for start in range(0, len(node_indices), batch_size):
        end = min(start + batch_size, len(node_indices))
        batch_nodes = node_indices[start:end]
        
        batch_edges = []
        batch_weights = []
        for idx_i in range(len(batch_nodes)):
            i = batch_nodes[idx_i]
            for idx_j in range(idx_i + 1, len(batch_nodes)):
                j = batch_nodes[idx_j]
                same_leaf_count = np.sum(test_leaf_indices[i] == test_leaf_indices[j])
                freq = same_leaf_count / 3
                price_similarity = abs(test_amounts[i] - test_amounts[j]) / (test_amounts[i] + test_amounts[j] + 1e-5)
                if price_similarity < 0.5 and freq > 0.7:
                    weight = freq * (1.0 - price_similarity)
                    batch_edges.append([i, j])
                    batch_edges.append([j, i])
                    batch_weights.append(weight)
                    batch_weights.append(weight)
        
        if batch_edges:
            batch_edges_array = np.array(batch_edges, dtype=np.int64)
            batch_weights_array = np.array(batch_weights, dtype=np.float32)
            with open(test_edges_file, 'ab') as f:
                np.save(f, batch_edges_array)
            with open(test_weights_file, 'ab') as f:
                np.save(f, batch_weights_array)
            test_total_edges += len(batch_edges)
        
        del batch_edges, batch_weights
        gc.collect()

In [None]:
print(f"Total test SIMILAR_LEAF edges: {test_total_edges}")
if test_total_edges > 0:
    test_all_edges = np.empty((test_total_edges, 2), dtype=np.int64)
    test_all_weights = np.empty(test_total_edges, dtype=np.float32)
    
    edges_pos = 0
    with open(test_edges_file, 'rb') as f_edges, open(test_weights_file, 'rb') as f_weights:
        while edges_pos < test_total_edges:
            chunk_edges = np.load(f_edges)
            chunk_weights = np.load(f_weights)
            chunk_size = len(chunk_edges)
            test_all_edges[edges_pos:edges_pos + chunk_size] = chunk_edges
            test_all_weights[edges_pos:edges_pos + chunk_size] = chunk_weights
            edges_pos += chunk_size
else:
    test_all_edges = np.empty((0, 2), dtype=np.int64)
    test_all_weights = np.empty(0, dtype=np.float32)

os.remove(test_edges_file)
os.remove(test_weights_file)

In [None]:
test_edge_index = torch.tensor(test_all_edges, dtype=torch.long).t().contiguous()
test_edge_weight = torch.tensor(test_all_weights, dtype=torch.float)

del test_all_edges, test_all_weights
gc.collect()

test_data = Data(
    x=torch.tensor(test_features.values, dtype=torch.float),
    edge_index=test_edge_index,
    edge_attr=test_edge_weight,
    y=torch.tensor(test_y, dtype=torch.float)
)

In [None]:
print(f"Test Graph: {test_data.num_nodes} nodes, {test_data.num_edges} edges")

# Lưu test data vào file .npz
print("Saving test graph to .npz file...")
np.savez(
    'store/test_graph_data.npz',
    x=test_data.x.cpu().numpy(),
    edge_index=test_data.edge_index.cpu().numpy(),
    edge_attr=test_data.edge_attr.cpu().numpy(),
    y=test_data.y.cpu().numpy()
)

# DataLoader cho test data
test_loader = NeighborSampler(
    test_data.edge_index,
    node_idx=None,
    sizes=[15, 10, 5],
    batch_size=1024,
    shuffle=False,
    num_workers=0
)

In [None]:
# Chuyển test data sang CPU
test_data.x = test_data.x.to('cpu')
test_data.y = test_data.y.to('cpu')
test_data.edge_index = test_data.edge_index.to('cpu')
test_data.edge_attr = test_data.edge_attr.to('cpu')

In [None]:
# Đánh giá mô hình
model.eval()
preds = []
trues = []
with torch.no_grad():
    for batch_size, n_id, adjs in test_loader:
        adj = adjs[-1]
        edge_index = adj.edge_index.to(device)
        
        # Lấy edge_weight tương ứng với edge_index của batch
        edge_weight = test_data.edge_attr[adj.e_id] if adj.e_id is not None and len(adj.e_id) > 0 else None
        if edge_weight is not None:
            edge_weight = edge_weight.to(device)
        else:
            edge_weight = torch.ones(edge_index.size(1), device=device)  # Mặc định trọng số 1 nếu không có

        x_batch = test_data.x[n_id].to(device)
        y_batch = test_data.y[n_id[:batch_size]].to(device)
        
        out = model(x_batch, edge_index, edge_weight, batch_size)
        preds.append(np.expm1(out.cpu().numpy()))
        trues.append(np.expm1(y_batch.cpu().numpy()))

pred = np.concatenate(preds)
true = np.concatenate(trues)

In [None]:
mae = mean_absolute_error(true, pred)
mse = mean_squared_error(true, pred)
r2 = r2_score(true, pred)
rmse = np.sqrt(mse)
mape = np.mean(np.abs((true - pred) / true)) * 100

print("\nGNN Test Metrics:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R2 Score: {r2:.4f}")
print(f"RMSE: {rmse:.2f}")
print(f"MAPE: {mape:.2f}%")

# Lưu mô hình
torch.save(model.state_dict(), 'store/gnn_model_with_random_forest.pth')

# Clear memory
gc.collect()
torch.cuda.empty_cache()