# 1. Convert NetworkX Graph to PyTorch Geometric Format


In [None]:
import torch
from torch_geometric.data import Data

# Map node names to indices
node_names = list(G.nodes)
node_to_idx = {name: idx for idx, name in enumerate(node_names)}

# Create node features (average of main and abbr embeddings if available)
node_features = []
for node_name in node_names:
    concept = next(c for c in processed_concepts if c['name'] == node_name)
    idx = processed_concepts.index(concept)
    main_emb_i = main_emb[idx]
    abbr_emb_i = abbr_emb[idx] if concept['abbr'] else None
    feature = (main_emb_i + abbr_emb_i) / 2 if abbr_emb_i is not None else main_emb_i
    node_features.append(feature)

node_features = torch.tensor(np.array(node_features), dtype=torch.float)

# Create edge indices
edge_list = []
for u, v in G.edges():
    edge_list.append([node_to_idx[u], node_to_idx[v]])
    edge_list.append([node_to_idx[v], node_to_idx[u]])  # Undirected

edge_index = torch.tensor(edge_list, dtype=torch.long).t().contiguous()

# Create PyG Data object
graph_data = Data(x=node_features, edge_index=edge_index)

# 2. Define GNN Model with Contrastive Loss

import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x

def contrastive_loss(z, edge_index, neg_samples=5):
    src, dst = edge_index
    pos_pairs = torch.stack([src, dst], dim=1)
    
    # Negative sampling
    neg_src = torch.randint(0, z.size(0), (src.size(0)*neg_samples,))
    neg_dst = torch.randint(0, z.size(0), (src.size(0)*neg_samples,))
    neg_pairs = torch.stack([neg_src, neg_dst], dim=1)
    
    # Similarity calculation
    pos_sim = F.cosine_similarity(z[pos_pairs[:,0]], z[pos_pairs[:,1]])
    neg_sim = F.cosine_similarity(z[neg_pairs[:,0]], z[neg_pairs[:,1]])
    
    # Loss
    loss = -torch.log(torch.sigmoid(pos_sim)).mean() + torch.log(torch.sigmoid(-neg_sim)).mean()
    return loss

# 3. Train the GNN Model

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GNN(input_dim=768, hidden_dim=256, output_dim=768).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

graph_data = graph_data.to(device)
model.train()
for epoch in range(100):
    optimizer.zero_grad()
    z = model(graph_data)
    loss = contrastive_loss(z, graph_data.edge_index)
    loss.backward()
    optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')

# 4. Generate Enhanced Embeddings

In [None]:
model.eval()
with torch.no_grad():
    enhanced_embeddings = model(graph_data).cpu().numpy()

# Map back to original concepts
concept_to_enhanced = {name: enhanced_embeddings[node_to_idx[name]] for name in node_names}

# 5. Recompute Job Embeddings with Enhanced Features

In [None]:
enhanced_job_embeddings = []
for job in jobs:
    job_vec = np.zeros_like(enhanced_embeddings[0])
    total_terms = 0

    for tech_skill in job.get("technology_skills", []):
        terms = [tech_skill.get("skill_title", "")] + [t["name"]
                                                       for t in tech_skill.get("technologies", [])]

        for term in terms:
            if term in concept_to_enhanced:
                job_vec += concept_to_enhanced[term]
                total_terms += 1

    if total_terms > 0:
        job_vec /= total_terms
    enhanced_job_embeddings.append(job_vec)

enhanced_job_embeddings = np.array(enhanced_job_embeddings)

# 6. Modify Recommendation Function

In [None]:
def recommend_jobs_with_gnn(filtered_candidates, top_n=5):
    # Create user embedding using enhanced concept embeddings
    user_vec = np.zeros_like(enhanced_job_embeddings[0])
    total_weight = 0.0

    for candidate in filtered_candidates:
        name, c_type, _, score = candidate
        if name not in concept_to_enhanced:
            continue

        weight = score * (1.0 if c_type == "technology_name" else 0.7)
        user_vec += concept_to_enhanced[name] * weight
        total_weight += weight

    if total_weight > 0:
        user_vec /= total_weight

    # Calculate similarities with enhanced job embeddings
    sims = cosine_similarity([user_vec], enhanced_job_embeddings)[0]
    top_indices = np.argsort(sims)[-top_n:][::-1]

    return [(job_titles[i], sims[i]) for i in top_indices]

In [None]:
# Example usage with sample skills
sample_skills = [
    ("Python (Python)", "technology_name", 0.95),
    ("Machine Learning", "skill_title", 0.88)
]

recommended = recommend_jobs_with_gnn(sample_skills)
for job, score in recommended:
    print(f"{job}: {score:.4f}")