In [None]:
# Import necessary libraries
import torch
import torch.nn as nn
from transformers import AutoModelForCausalLM, AutoTokenizer, get_linear_schedule_with_warmup
from transformers import pipeline  # For sentiment analysis
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from datasets import load_dataset
import spacy
import numpy as np
from tqdm import tqdm
import networkx as nx
import math
from newsapi import NewsApiClient
import json
import re

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load configuration
with open('config.json') as f:
    config = json.load(f)

# Load spaCy model for entity recognition
nlp = spacy.load('en_core_web_sm')

# Initialize News API client
newsapi = NewsApiClient(api_key=config['api_key'])

# Initialize the knowledge graph
knowledge_graph = nx.DiGraph()  # Use a directed graph to represent relationships with direction

# Initialize entity embedding dictionary
entity_embedding_dim = 128
entity_embedding_dict = {}

# Initialize confidence score dictionary
# This will map edge tuples to trainable parameters
confidence_scores = {}

# Initialize emotion tags for nodes
node_emotions = {}

# Initialize sentiment analysis pipeline
sentiment_analyzer = pipeline('sentiment-analysis', model='nlptown/bert-base-multilingual-uncased-sentiment')

# Define the sanitize function
def sanitize_param_name(name):
    # Replace any character that is not alphanumeric or underscore with an underscore
    return re.sub(r'[^a-zA-Z0-9_]', '_', name)

# Function to detect emotions in text
def detect_emotions(text):
    # For simplicity, we'll use sentiment analysis as a proxy for emotion detection
    # You can replace this with a more sophisticated emotion detection model
    result = sentiment_analyzer(text)
    # Get the label and score
    label = result[0]['label']
    score = result[0]['score']
    return label.lower(), score

# Function to update the knowledge graph with new data
def update_knowledge_graph():
    try:
        # Fetch the latest news articles
        articles = newsapi.get_top_headlines(language='en', page_size=100)
        for article in articles['articles']:
            content = article.get('content') or ''
            doc = nlp(content)
            entities = [ent.text for ent in doc.ents]
            # Detect emotion in the content
            emotion_label, emotion_score = detect_emotions(content)
            # Extract relationships using dependency parsing
            for sent in doc.sents:
                for token in sent:
                    if token.ent_type_:
                        subject = token.text
                        for child in token.children:
                            if child.ent_type_:
                                object_ = child.text
                                relation = token.dep_
                                # Create an edge tuple
                                edge = (subject, object_)
                                # Sanitize parameter names
                                edge_key = f"{sanitize_param_name(edge[0])}_{sanitize_param_name(edge[1])}"
                                # If the edge is not already in the graph, add it
                                if not knowledge_graph.has_edge(subject, object_):
                                    # Initialize confidence score as a trainable parameter
                                    confidence = nn.Parameter(torch.tensor(0.5, dtype=torch.float, device=device, requires_grad=True))
                                    confidence_scores[edge] = confidence
                                    # Add edge to the graph with emotion tag
                                    knowledge_graph.add_edge(subject, object_, relation=relation, emotion=emotion_label)
                                    # Store emotion tag for nodes
                                    node_emotions[subject] = emotion_label
                                    node_emotions[object_] = emotion_label
                                # Update embeddings
                                if subject not in entity_embedding_dict:
                                    entity_embedding_dict[subject] = np.random.rand(entity_embedding_dim)
                                if object_ not in entity_embedding_dict:
                                    entity_embedding_dict[object_] = np.random.rand(entity_embedding_dim)
    except Exception as e:
        print(f"Error updating knowledge graph: {e}")

# Update the knowledge graph initially
update_knowledge_graph()

# Load pre-trained models and tokenizer
tokenizer = AutoTokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token  # Set pad token
base_model = AutoModelForCausalLM.from_pretrained('gpt2')
base_model.config.pad_token_id = tokenizer.pad_token_id  # Set pad token ID in the model


# Define the enhanced model
class KGEnhancedModel(nn.Module):
    """Enhanced Model integrating KG embeddings, sentiment embeddings, and trainable confidence scores."""
    def __init__(self, base_model, kg_embedding_dim, sentiment_embedding_dim):
        super(KGEnhancedModel, self).__init__()
        self.base_model = base_model
        self.kg_linear = nn.Linear(kg_embedding_dim, base_model.config.n_embd)
        self.sentiment_linear = nn.Linear(sentiment_embedding_dim, base_model.config.n_embd)
        self.dropout = nn.Dropout(p=0.1)
        # Convert confidence scores to a ParameterDict for optimization
        self.confidence_scores = nn.ParameterDict({
            f"{sanitize_param_name(edge[0])}_{sanitize_param_name(edge[1])}": confidence_scores[edge].to(device)
            for edge in confidence_scores
        })

    def forward(self, input_ids, kg_embeddings, sentiment_embeddings, labels=None, attention_mask=None):
        # Get the base model's input embeddings
        inputs_embeds = self.base_model.transformer.wte(input_ids)

        # Project KG embeddings and sentiment embeddings
        kg_embeds = self.kg_linear(kg_embeddings).unsqueeze(1)
        sentiment_embeds = self.sentiment_linear(sentiment_embeddings).unsqueeze(1)

        # Expand embeddings to match input sequence length
        kg_embeds = kg_embeds.expand(-1, inputs_embeds.size(1), -1)
        sentiment_embeds = sentiment_embeds.expand(-1, inputs_embeds.size(1), -1)

        # Combine input embeddings with KG embeddings and sentiment embeddings
        inputs_embeds = inputs_embeds + self.dropout(kg_embeds) + self.dropout(sentiment_embeds)

        # Pass through the base model
        outputs = self.base_model(
            inputs_embeds=inputs_embeds,
            attention_mask=attention_mask,
            labels=labels
        )
        return outputs

    def generate(self, input_ids, kg_embeddings, sentiment_embeddings, attention_mask=None, **kwargs):
        # Use the base model's generate method
        inputs_embeds = self.base_model.transformer.wte(input_ids)
        kg_embeds = self.kg_linear(kg_embeddings).unsqueeze(1)
        sentiment_embeds = self.sentiment_linear(sentiment_embeddings).unsqueeze(1)
        kg_embeds = kg_embeds.expand(-1, inputs_embeds.size(1), -1)
        sentiment_embeds = sentiment_embeds.expand(-1, inputs_embeds.size(1), -1)
        inputs_embeds = inputs_embeds + self.dropout(kg_embeds) + self.dropout(sentiment_embeds)
        outputs = self.base_model.generate(
            inputs_embeds=inputs_embeds,
            attention_mask=attention_mask,
            **kwargs
        )
        return outputs
    
def transformed_loss(loss, delta_x):
    epoch_factor = math.exp(-delta_x / 10)  
    dynamic_adj = 1 + 0.1 * math.cos(delta_x * 0.5) 
    base_adjustment = loss * epoch_factor * dynamic_adj
    comp_adjustment = base_adjustment * (1 - math.tanh(delta_x / 20))  
    scaling_factor = math.sqrt(1 + loss) / delta_x  
    scaled_loss = comp_adjustment * scaling_factor
    noise = math.sin(loss * delta_x ) * math.exp(-delta_x / 2) * 0.05
    final_adjusted_loss = scaled_loss - noise
    return final_adjusted_loss

# Collate function for DataLoader
def collate_fn(batch):
    input_ids_batch = [item['input_ids'] for item in batch]
    kg_embeddings_batch = [item['kg_embeddings'] for item in batch]
    sentiment_embeddings_batch = [item['sentiment_embeddings'] for item in batch]
    labels_batch = [item['labels'] for item in batch]
    explanations_batch = [item['explanation_ids'] for item in batch]

    # Pad sequences
    input_ids_padded = pad_sequence(input_ids_batch, batch_first=True, padding_value=tokenizer.pad_token_id)
    labels_padded = pad_sequence(labels_batch, batch_first=True, padding_value=-100)
    explanations_padded = pad_sequence(explanations_batch, batch_first=True, padding_value=tokenizer.pad_token_id)

    # Compute attention mask
    attention_mask_padded = (input_ids_padded != tokenizer.pad_token_id).long()

    # Stack embeddings
    kg_embeddings_tensor = torch.stack(kg_embeddings_batch)
    sentiment_embeddings_tensor = torch.stack(sentiment_embeddings_batch)

    return input_ids_padded, attention_mask_padded, kg_embeddings_tensor, sentiment_embeddings_tensor, labels_padded, explanations_padded

# Custom Dataset for DailyDialog
class DailyDialogDataset(Dataset):
    def __init__(self, dialogues, kg_embedding_dim=128, sentiment_embedding_dim=3, max_length=1024, model=None):
        self.dialogues = dialogues
        self.kg_embedding_dim = kg_embedding_dim
        self.sentiment_embedding_dim = sentiment_embedding_dim
        self.max_length = max_length
        self.model = model

    def get_multi_hop_entities(self, entities, hops=2):
        # Filter entities to only those present in the graph
        current_entities = set(entity for entity in entities if knowledge_graph.has_node(entity))
        related_entities = set(current_entities)
        for _ in range(hops):
            next_entities = set()
            for entity in current_entities:
                if knowledge_graph.has_node(entity):
                    neighbors = list(knowledge_graph.neighbors(entity))
                    next_entities.update(neighbors)
            related_entities.update(next_entities)
            current_entities = next_entities
        return list(related_entities)

    def get_kg_embeddings(self, context_entities, detected_emotion):
        # Get multi-hop related entities
        all_entities = self.get_multi_hop_entities(context_entities, hops=2)
        embeddings = []
        for entity in all_entities:
            if entity in entity_embedding_dict:
                # Retrieve entity embedding
                entity_embedding = torch.tensor(entity_embedding_dict[entity], dtype=torch.float, device=device)
                # Get confidence scores for relationships involving this entity
                confidences = []
                for neighbor in knowledge_graph.neighbors(entity):
                    edge = (entity, neighbor)
                    edge_key = f"{sanitize_param_name(edge[0])}_{sanitize_param_name(edge[1])}"
                    confidence_param = self.model.confidence_scores.get(edge_key)
                    if confidence_param is not None:
                        confidence_param = confidence_param.to(device)
                        confidences.append(confidence_param)
                if confidences:
                    # Ensure all tensors in confidences are on the same device
                    confidences = [conf.to(device) for conf in confidences]
                    avg_confidence = torch.mean(torch.stack(confidences))
                else:
                    avg_confidence = torch.tensor(1.0, device=device)
                # Adjust weight based on emotion matching
                entity_emotion = node_emotions.get(entity, 'neutral')
                if entity_emotion == detected_emotion:
                    emotion_weight = torch.tensor(1.5, device=device)  # Give higher weight
                else:
                    emotion_weight = torch.tensor(1.0, device=device)
                # Weight the embedding by the average confidence and emotion weight
                weighted_embedding = entity_embedding * avg_confidence * emotion_weight
                embeddings.append(weighted_embedding)
        if embeddings:
            kg_embedding = torch.mean(torch.stack(embeddings), dim=0)
        else:
            kg_embedding = torch.zeros(self.kg_embedding_dim, device=device)
        return kg_embedding

    def get_sentiment_embedding(self, sentiment_label):
        # Map sentiment labels to embeddings
        sentiment_dict = {
            '1 star': torch.tensor([1, 0, 0], dtype=torch.float, device=device),  # Negative
            '2 stars': torch.tensor([1, 0, 0], dtype=torch.float, device=device),  # Negative
            '3 stars': torch.tensor([0, 1, 0], dtype=torch.float, device=device),  # Neutral
            '4 stars': torch.tensor([0, 0, 1], dtype=torch.float, device=device),  # Positive
            '5 stars': torch.tensor([0, 0, 1], dtype=torch.float, device=device),  # Positive
        }
        return sentiment_dict.get(sentiment_label, torch.tensor([0, 1, 0], dtype=torch.float, device=device))  # Default to neutral

    def get_explanation(self, context_entities):
        # Generate an explanation string based on the knowledge graph
        explanations = []
        for entity in context_entities:
            if knowledge_graph.has_node(entity):
                for neighbor in knowledge_graph.neighbors(entity):
                    edge_data = knowledge_graph.get_edge_data(entity, neighbor)
                    relation = edge_data.get('relation', 'relatedTo')
                    emotion = edge_data.get('emotion', 'neutral')
                    edge = (entity, neighbor)
                    edge_key = f"{sanitize_param_name(edge[0])}_{sanitize_param_name(edge[1])}"
                    confidence_param = self.model.confidence_scores.get(edge_key)
                    if confidence_param is not None:
                        confidence = confidence_param.item()
                    else:
                        confidence = 1.0
                    explanations.append(f"{entity} {relation} {neighbor} (emotion: {emotion}, confidence: {confidence:.2f})")
        if explanations:
            explanation_text = "I believe this is correct because " + "; ".join(explanations)
        else:
            explanation_text = ""
        return explanation_text

    def __getitem__(self, idx):
        dialogue = self.dialogues[idx]['dialog']
        # Use the entire dialogue history for context
        input_text = ' '.join(dialogue[:-1])  # All but the last utterance as input
        response_text = dialogue[-1]  # The last utterance as the response

        # Tokenize input and response
        input_ids = tokenizer.encode(input_text, return_tensors='pt').squeeze()
        response_ids = tokenizer.encode(response_text, return_tensors='pt').squeeze()

        # Truncate sequences to max_length
        total_length = len(input_ids) + len(response_ids)
        if total_length > self.max_length:
            input_ids = input_ids[-(self.max_length - len(response_ids)):]
            total_length = len(input_ids) + len(response_ids)

        # Create labels: -100 for input tokens, actual tokens for response
        input_ids = torch.cat([input_ids, response_ids])
        labels = input_ids.clone()
        labels[:-len(response_ids)] = -100

        # Use the sentiment analyzer's tokenizer for truncation
        sentiment_tokenizer = sentiment_analyzer.tokenizer
        max_sentiment_length = sentiment_tokenizer.model_max_length

        # Truncate input_text for sentiment analysis
        sentiment_input_ids = sentiment_tokenizer.encode(
            input_text,
            max_length=max_sentiment_length,
            truncation=True,
            return_tensors='pt'
        ).squeeze()
        truncated_input_text = sentiment_tokenizer.decode(sentiment_input_ids)

        # Detect sentiment of the input text
        sentiment_result = sentiment_analyzer(
            truncated_input_text,
            truncation=True,
            max_length=max_sentiment_length
        )
        sentiment_label = sentiment_result[0]['label']
        detected_emotion = sentiment_label.lower()

        # Extract entities from the entire dialogue context
        doc = nlp(input_text)
        context_entities = [ent.text for ent in doc.ents]

        # Get KG embeddings, considering emotion
        kg_embeddings = self.get_kg_embeddings(context_entities, detected_emotion)

        # Get sentiment embedding
        sentiment_embeddings = self.get_sentiment_embedding(sentiment_label)

        # Get explanation text
        explanation_text = self.get_explanation(context_entities)

        # Append explanation to the response
        if explanation_text:
            augmented_response = response_text + " " + explanation_text
        else:
            augmented_response = response_text

        # Tokenize augmented response for training
        augmented_response_ids = tokenizer.encode(augmented_response, return_tensors='pt').squeeze()

        # Adjust labels to include explanation
        input_ids = torch.cat([input_ids[:-len(response_ids)], augmented_response_ids])
        labels = torch.cat([labels[:-len(response_ids)], augmented_response_ids])

        # Ensure total length does not exceed max_length
        if len(input_ids) > self.max_length:
            input_ids = input_ids[-self.max_length:]
            labels = labels[-self.max_length:]

        # Create explanation_ids for reference (optional)
        explanation_ids = tokenizer.encode(explanation_text, return_tensors='pt').squeeze()
        if len(explanation_ids) == 0:
            explanation_ids = torch.tensor([tokenizer.eos_token_id])

        return {
            'input_ids': input_ids,
            'kg_embeddings': kg_embeddings,
            'sentiment_embeddings': sentiment_embeddings,
            'labels': labels,
            'explanation_ids': explanation_ids
        }


    def __len__(self):
        return len(self.dialogues)

# Initialize the model and optimizer
sentiment_embedding_dim = 3  # We have three sentiment classes: negative, neutral, positive
model = KGEnhancedModel(base_model, entity_embedding_dim, sentiment_embedding_dim).to(device)

# Prepare the optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)

# Load the DailyDialog dataset
dataset = load_dataset('daily_dialog')
train_data = dataset['train']
validation_data = dataset['validation']
test_data = dataset['test']

# Create DataLoaders
batch_size = 4  # Adjust based on your hardware
max_length = 1024  # Max sequence length

# Pass the model to the dataset
train_dataset = DailyDialogDataset(train_data, kg_embedding_dim=entity_embedding_dim, sentiment_embedding_dim=sentiment_embedding_dim, max_length=max_length, model=model)
validation_dataset = DailyDialogDataset(validation_data, kg_embedding_dim=entity_embedding_dim, sentiment_embedding_dim=sentiment_embedding_dim, max_length=max_length, model=model)
test_dataset = DailyDialogDataset(test_data, kg_embedding_dim=entity_embedding_dim, sentiment_embedding_dim=sentiment_embedding_dim, max_length=max_length, model=model)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

# Learning rate scheduler
runs = 10
total_steps = len(train_loader) * runs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=500, num_training_steps=total_steps)

# Training function with tqdm
def train(model, optimizer, scheduler, dataloader, device):
    model.train()
    total_loss = 0
    progress_bar = tqdm(enumerate(dataloader), total=len(dataloader), desc="Training")
    for step, batch in progress_bar:
        input_ids, attention_mask, kg_embeddings, sentiment_embeddings, labels, _ = [b.to(device) for b in batch]

        optimizer.zero_grad()
        outputs = model(input_ids, kg_embeddings, sentiment_embeddings, labels=labels, attention_mask=attention_mask)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        scheduler.step()

        total_loss += loss.item()
        progress_bar.set_postfix({'loss': loss.item()})
    avg_loss = total_loss / len(dataloader)
    return avg_loss

# Validation/Testing function with tqdm
def evaluate(model, dataloader, device, desc="Evaluating"):
    model.eval()
    total_loss = 0
    total_ppl = 0
    progress_bar = tqdm(enumerate(dataloader), total=len(dataloader), desc=desc)
    with torch.no_grad():
        for step, batch in progress_bar:
            input_ids, attention_mask, kg_embeddings, sentiment_embeddings, labels, _ = [b.to(device) for b in batch]
            outputs = model(input_ids, kg_embeddings, sentiment_embeddings, labels=labels, attention_mask=attention_mask)
            loss = outputs.loss
            total_loss += loss.item()
            progress_bar.set_postfix({'loss': loss.item()})
    avg_loss = total_loss / len(dataloader)
    return avg_loss

# Training and Validation with tqdm progress bars
best_validation_loss = float('inf')

for run in range(runs):
    print(f"Epoch {run + 1}/{runs}")
    train_loss = transformed_loss(train(model, optimizer, scheduler, train_loader, device), run+1)
    validation_loss = transformed_loss(evaluate(model, validation_loader, device, desc="Validating"), run+1)
    print(f"Train Loss: {train_loss:.4f}, Validation Loss: {validation_loss:.4f}")

    # Save the model if validation loss has decreased
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        torch.save(model.state_dict(), 'kg_enhanced_model.pt')

# Testing
model.load_state_dict(torch.load('kg_enhanced_model.pt'))
test_loss = transformed_loss(evaluate(model, test_loader, device, desc="Testing"), runs)
print(f"Test Loss: {test_loss:.4f}")



Epoch 1/10


Training: 100%|██████████| 2780/2780 [20:51<00:00,  2.22it/s, loss=3.04] 
Validating: 100%|██████████| 250/250 [01:34<00:00,  2.65it/s, loss=4.04] 


Train Loss: 7.8576, Validation Loss: 4.5207
Epoch 2/10


Training: 100%|██████████| 2780/2780 [20:48<00:00,  2.23it/s, loss=2.1]  
Validating: 100%|██████████| 250/250 [01:34<00:00,  2.65it/s, loss=3.24] 


Train Loss: 1.9262, Validation Loss: 1.6824
Epoch 3/10


Training: 100%|██████████| 2780/2780 [20:48<00:00,  2.23it/s, loss=2.28] 
Validating: 100%|██████████| 250/250 [01:34<00:00,  2.64it/s, loss=2.69] 


Train Loss: 0.8586, Validation Loss: 0.8755
Epoch 4/10


Training: 100%|██████████| 2780/2780 [20:46<00:00,  2.23it/s, loss=1.37] 
Validating: 100%|██████████| 250/250 [01:34<00:00,  2.64it/s, loss=2.46] 


Train Loss: 0.4410, Validation Loss: 0.5237
Epoch 5/10


Training: 100%|██████████| 2780/2780 [20:56<00:00,  2.21it/s, loss=2.57] 
Validating: 100%|██████████| 250/250 [01:35<00:00,  2.62it/s, loss=2.11] 


Train Loss: 0.2483, Validation Loss: 0.3507
Epoch 6/10


Training: 100%|██████████| 2780/2780 [20:54<00:00,  2.22it/s, loss=2.01]  
Validating: 100%|██████████| 250/250 [01:34<00:00,  2.65it/s, loss=1.96] 


Train Loss: 0.1510, Validation Loss: 0.2397
Epoch 7/10


Training: 100%|██████████| 2780/2780 [21:09<00:00,  2.19it/s, loss=1.84] 
Validating: 100%|██████████| 250/250 [01:34<00:00,  2.64it/s, loss=2.01] 


Train Loss: 0.0968, Validation Loss: 0.1846
Epoch 8/10


Training: 100%|██████████| 2780/2780 [20:55<00:00,  2.21it/s, loss=1.14] 
Validating: 100%|██████████| 250/250 [01:34<00:00,  2.64it/s, loss=1.88] 


Train Loss: 0.0653, Validation Loss: 0.1426
Epoch 9/10


Training: 100%|██████████| 2780/2780 [20:50<00:00,  2.22it/s, loss=1.05] 
Validating: 100%|██████████| 250/250 [01:35<00:00,  2.63it/s, loss=1.84] 


Train Loss: 0.0461, Validation Loss: 0.1159
Epoch 10/10


Training: 100%|██████████| 2780/2780 [20:53<00:00,  2.22it/s, loss=1.86] 
Validating: 100%|██████████| 250/250 [01:34<00:00,  2.64it/s, loss=1.86] 


Train Loss: 0.0341, Validation Loss: 0.0943


Testing: 100%|██████████| 250/250 [01:33<00:00,  2.69it/s, loss=1.74] 

Test Loss: 0.0960



