In [None]:
import torch
from torch_geometric.data import Data
from torch_geometric.nn import GATConv
from torch_geometric.utils import dense_to_sparse
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import pandas as pd

# Load the uploaded file to inspect its structure
file_path = 'rest.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset to understand its structure
data.head()
# Step 1: Preprocess the data
# Assuming normalized_features, similarity_matrix, and encoded labels are available
edge_index, edge_weight = dense_to_sparse(torch.tensor(similarity_matrix, dtype=torch.float32))

x = torch.tensor(normalized_features, dtype=torch.float32)
y = torch.tensor(encoded_labels, dtype=torch.long)

graph_data = Data(x=x, edge_index=edge_index, edge_weight=edge_weight, y=y)

# Step 2: Train-test split
train_mask, test_mask = train_test_split(torch.arange(y.size(0)), test_size=0.2, random_state=42)
graph_data.train_mask = torch.tensor(train_mask, dtype=torch.long)
graph_data.test_mask = torch.tensor(test_mask, dtype=torch.long)

# Step 3: Define the GAT Model
class GAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads=1):
        super(GAT, self).__init__()
        self.gat1 = GATConv(in_channels, hidden_channels, heads=heads)
        self.gat2 = GATConv(hidden_channels * heads, out_channels, heads=1, concat=False)
    
    def forward(self, x, edge_index, edge_weight):
        x = self.gat1(x, edge_index, edge_weight=edge_weight)
        x = torch.nn.functional.elu(x)
        x = self.gat2(x, edge_index, edge_weight=edge_weight)
        return x

# Step 4: Initialize the model, optimizer, and loss function
model = GAT(
    in_channels=x.size(1),
    hidden_channels=32,
    out_channels=len(np.unique(encoded_labels)),
    heads=4
)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

# Step 5: Training function
def train(model, data, epochs=200):
    model.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.edge_weight)
        loss = criterion(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()
        if epoch % 10 == 0:
            print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

# Step 6: Evaluation function
def evaluate(model, data):
    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index, data.edge_weight)
        pred = out.argmax(dim=1)
        
        # Calculate metrics
        y_true = data.y[data.test_mask].cpu().numpy()
        y_pred = pred[data.test_mask].cpu().numpy()
        
        mae = mean_absolute_error(y_true, y_pred)
        mse = mean_squared_error(y_true, y_pred)
        rmse = np.sqrt(mse)
        accuracy = (y_true == y_pred).sum() / len(y_true)

        print(f"Accuracy: {accuracy:.4f}")
        print(f"Mean Absolute Error (MAE): {mae:.4f}")
        print(f"Mean Squared Error (MSE): {mse:.4f}")
        print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")

# Train the model
train(model, graph_data)

# Evaluate the model
evaluate(model, graph_data)