In [33]:
from neo4j import GraphDatabase
import torch
from torch_geometric.data import Data
import torch_geometric
from torch_geometric.nn import SAGEConv
import torch.nn.functional as F

# Connect to Neo4j Database
uri = "bolt://localhost:7687"  # Adjust if needed
username = "neo4j"  # Your username
password = "test_password"  # Your password
driver = GraphDatabase.driver(uri, auth=(username, password))

# Function to fetch user-article interactions, article-tags, and article-category relationships
def fetch_data_from_neo4j():
    query = """
    MATCH (u:User)-[r:READ]->(a:Article)
    MATCH (a)-[t:HAS_TAG]->(tag:Tag)
    MATCH (a)-[c:BELONGS_TO]->(category:Category)
    RETURN u.id AS user_id, a.url AS article_url, r.timeSpent AS timeSpent,
           collect(tag.name) AS tags, collect(category.name) AS categories
    """
    
    session = driver.session()
    result = session.run(query)
    
    user_article_data = []
    for record in result:
        user_article_data.append({
            "user_id": record["user_id"],
            "article_url": record["article_url"],
            "read_time": record["timeSpent"],
            "tags": record["tags"],
            "categories": record["categories"]
        })
    
    session.close()
    return user_article_data

# Example of fetching data from Neo4j
user_article_data = fetch_data_from_neo4j()


In [34]:
def prepare_data_for_graphsage(user_article_data):
    user_map = {}
    article_map = {}
    tag_map = {}
    category_map = {}
    edges = []
    edge_attr = []
    
    # Create mappings for users, articles, tags, categories
    for idx, data in enumerate(user_article_data):
        if data["user_id"] not in user_map:
            user_map[data["user_id"]] = len(user_map)
        if data["article_url"] not in article_map:
            article_map[data["article_url"]] = len(article_map)
        
        # Create edge between user and article (with read_time as edge attribute)
        edges.append((user_map[data["user_id"]], article_map[data["article_url"]]))
        edge_attr.append([data["read_time"]])  # Read time
        
        # Create edges between article and tags
        for tag in data["tags"]:
            if tag not in tag_map:
                tag_map[tag] = len(tag_map)
            edges.append((article_map[data["article_url"]], tag_map[tag]))
            edge_attr.append([1])  # Assuming a simple binary weight for tags
        
        # Create edges between article and category
        for category in data["categories"]:
            if category not in category_map:
                category_map[category] = len(category_map)
            edges.append((article_map[data["article_url"]], category_map[category]))
            edge_attr.append([1])  # Binary weight for category

    # Convert edges to 2D tensor (shape: [2, num_edges])
    edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()  # Ensure it's 2D [2, num_edges]
    edge_attr = torch.tensor(edge_attr, dtype=torch.float)  # Edge attributes
    
    # Create dummy node features (identity matrix for now)
    num_nodes = len(user_map) + len(article_map) + len(tag_map) + len(category_map)
    x = torch.eye(num_nodes, dtype=torch.float)  # Identity matrix as features

    # Create Data object (PyTorch Geometric's format)
    data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr)
    
    return user_map, article_map, tag_map, category_map, data

# Prepare data for GraphSAGE
user_map, article_map, tag_map, category_map, data = prepare_data_for_graphsage(user_article_data)


In [35]:
class GraphSAGE(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GraphSAGE, self).__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, out_channels)
        self.relu = torch.nn.ReLU()

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = self.relu(x)
        x = self.conv2(x, edge_index)
        return x


In [40]:
import torch.nn.functional as F

def train_graphsage(user_article_data, num_epochs=10):
    # Prepare data
    user_map, article_map, tag_map, category_map, data = prepare_data_for_graphsage(user_article_data)
    
    # Create GraphSAGE model
    model = GraphSAGE(in_channels=data.x.size(1), hidden_channels=32, out_channels=128)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        
        out = model(data)  # Forward pass
        
        # Cosine similarity between user embeddings and article embeddings
        user_embeddings = out[:len(user_map)]
        article_embeddings = out[len(user_map):len(user_map) + len(article_map)]

        # Compute cosine similarity between user and article embeddings
        similarity_matrix = torch.matmul(user_embeddings, article_embeddings.t())  # [num_users, num_articles]

        # Now, you need to compare similarity_matrix with the ground truth
        # This part depends on your dataset. For example, you could compute a binary cross-entropy loss
        # with a label indicating whether a user has read an article or not.

        # Assuming you have a binary label (0 or 1) for each (user, article) pair
        labels = torch.tensor([[1 if (user, article) in user_article_data else 0
                               for article in article_map] for user in user_map])

        # Compute binary cross-entropy loss
        loss = F.binary_cross_entropy_with_logits(similarity_matrix, labels.float())
        
        loss.backward()
        optimizer.step()

    return model, user_map, article_map, tag_map, category_map, data

model, user_map, article_map, tag_map, category_map, data = train_graphsage(user_article_data)


In [43]:
def make_recommendations(model, data, user_map, article_map, top_k=3):
    model.eval()
    
    # Get node embeddings from the model
    with torch.no_grad():
        out = model(data)
    
    # Recommend articles for a specific user
    user_id = "user456"  # Change this based on your needs
    user_idx = user_map[user_id]
    
    # Get the user's node embedding
    user_embedding = out[user_idx]
    
    # Calculate similarity between the user and all articles (cosine similarity)
    article_embeddings = out[len(user_map):len(user_map)+len(article_map)]  # Article nodes
    
    similarities = torch.matmul(user_embedding, article_embeddings.t())  # Cosine similarity
    top_articles = similarities.topk(top_k).indices
    
    # Map article indices back to URLs
    recommended_articles = [list(article_map.keys())[i] for i in top_articles]
    
    return recommended_articles

# Make recommendations for user1
recommended_articles = make_recommendations(model, data, user_map, article_map, top_k=3)
print(f"Recommended articles for user1: {recommended_articles}")


Recommended articles for user1: ['https://www.thehindu.com/news/national/telangana/ten-years-after-the-creation-of-a-separate-telangana-dividing-a-culture/article68282827.ece', 'https://www.thehindu.com/sci-tech/technology/googles-ai-chatbot-gemini-verbally-abused-user-told-them-to-die-report/article68871570.ece', 'https://www.thehindu.com/news/cities/Delhi/delhi-air-pollution-cm-atishi-announces-staggered-timings-for-govt-offices-to-tackle-traffic-congestion/article68871906.ece']
