In [2]:

#  Jarvis-M with DialogSum Dataset 


from datasets import load_dataset
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util

# -------------------------------
# 1Ô∏è Load DialogSum Dataset
# -------------------------------
dataset = load_dataset("knkarthick/dialogsum")

# View example structure
print(dataset['train'][0])

# Pick a few sample dialogues
session_1 = dataset['train'][10]['dialogue']
session_2 = dataset['train'][11]['dialogue']
session_3 = dataset['train'][12]['dialogue']

print("\n Session 1 Dialogue:\n", session_1[:500], "...")
print("\n Session 2 Dialogue:\n", session_2[:500], "...")

# -------------------------------
# 2Ô∏è Load Models
# -------------------------------
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
embedder = SentenceTransformer('all-MiniLM-L6-v2')

# -------------------------------
# 3Ô∏è Define Helper Function
# -------------------------------
def summarize_text(text):
    summary = summarizer(text, max_length=60, min_length=15, do_sample=False)[0]['summary_text']
    return summary

# -------------------------------
# 4Ô∏è Generate Individual Summaries
# -------------------------------
summary_1 = summarize_text(session_1)
summary_2 = summarize_text(session_2)
summary_3 = summarize_text(session_3)

print("\nDay 1 Summary:\n", summary_1)
print("\nDay 2 Summary:\n", summary_2)
print("\nDay 3 Summary:\n", summary_3)

# -------------------------------
# 5Ô∏è Memory & Retrieval (Intra-user + Cross-user)
# -------------------------------
# Simulate users for the three sessions
user_sessions = [
    {"user_id": "user_A", "dialogue": session_1, "summary": summary_1},
    {"user_id": "user_B", "dialogue": session_2, "summary": summary_2},
    {"user_id": "user_A", "dialogue": session_3, "summary": summary_3},  # current
]

current = user_sessions[2]
past_sessions = user_sessions[:2]

# Split past memory into own vs cross-user
own_past_summaries = [s["summary"] for s in past_sessions if s["user_id"] == current["user_id"]]
cross_user_summaries = [s["summary"] for s in past_sessions if s["user_id"] != current["user_id"]]

# Encode query on the current raw dialogue to find semantically similar memories
query_embedding = embedder.encode(current["dialogue"], convert_to_tensor=True)

# Intra-user retrieval (most relevant own past summary)
retrieved_context = ""
if len(own_past_summaries) > 0:
    own_embeddings = embedder.encode(own_past_summaries, convert_to_tensor=True)
    own_scores = util.pytorch_cos_sim(query_embedding, own_embeddings)
    own_idx = own_scores.argmax().item()
    retrieved_context = own_past_summaries[own_idx]

# Cross-user retrieval (top-k relevant summaries from other users)
top_k = 1  # increase to 2+ if desired
cross_user_context = ""
if len(cross_user_summaries) > 0:
    cross_embeddings = embedder.encode(cross_user_summaries, convert_to_tensor=True)
    cross_scores = util.pytorch_cos_sim(query_embedding, cross_embeddings)  # shape: [1, N]
    k = min(top_k, len(cross_user_summaries))
    top_vals, top_idxs = cross_scores.topk(k, dim=1)
    idxs = top_idxs.squeeze(0).tolist()
    if isinstance(idxs, int):
        idxs = [idxs]
    cross_user_context = " ".join([cross_user_summaries[i] for i in idxs])

print("\nüß© Retrieved Intra-User Memory Context:\n", retrieved_context if retrieved_context else "[none]")
print("\nüë• Retrieved Cross-User Memory Context:\n", cross_user_context if cross_user_context else "[none]")

# -------------------------------
# 6Ô∏è Cross-Session + Cross-User Memory-Aware Summary
# -------------------------------
hybrid_input_parts = []
if retrieved_context:
    hybrid_input_parts.append(retrieved_context)
if cross_user_context:
    hybrid_input_parts.append(cross_user_context)
hybrid_input_parts.append(current["dialogue"])
hybrid_input = " ".join(hybrid_input_parts)

cross_user_summary = summarize_text(hybrid_input)
print("\n Cross-User Memory-Aware Summary:\n", cross_user_summary)

# -------------------------------
# 7Ô∏è Save Results for Report
# -------------------------------
with open("jarvisM_dialogsum_demo.txt", "w", encoding="utf-8") as f:
    f.write("Day 1 Summary:\n" + summary_1 + "\n\n")
    f.write("Day 2 Summary:\n" + summary_2 + "\n\n")
    f.write("Day 3 Summary:\n" + summary_3 + "\n\n")
    f.write("Retrieved Intra-User Context:\n" + (retrieved_context or "[none]") + "\n\n")
    f.write("Retrieved Cross-User Context:\n" + (cross_user_context or "[none]") + "\n\n")
    f.write("Cross-User Memory-Aware Summary:\n" + cross_user_summary + "\n")

print("\n Output saved to jarvisM_dialogsum_demo.txt")

{'id': 'train_0', 'dialogue': "#Person1#: Hi, Mr. Smith. I'm Doctor Hawkins. Why are you here today?\n#Person2#: I found it would be a good idea to get a check-up.\n#Person1#: Yes, well, you haven't had one for 5 years. You should have one every year.\n#Person2#: I know. I figure as long as there is nothing wrong, why go see the doctor?\n#Person1#: Well, the best way to avoid serious illnesses is to find out about them early. So try to come at least once a year for your own good.\n#Person2#: Ok.\n#Person1#: Let me see here. Your eyes and ears look fine. Take a deep breath, please. Do you smoke, Mr. Smith?\n#Person2#: Yes.\n#Person1#: Smoking is the leading cause of lung cancer and heart disease, you know. You really should quit.\n#Person2#: I've tried hundreds of times, but I just can't seem to kick the habit.\n#Person1#: Well, we have classes and some medications that might help. I'll give you more information before you leave.\n#Person2#: Ok, thanks doctor.", 'summary': "Mr. Smith's 

Device set to use cuda:0



Day 1 Summary:
 A man asks a friend to run over to the store for a few things. "We need a small bag of sugar, four oranges, and a half gallon of milk," the man says.

Day 2 Summary:
 A family member shows off a picture of their mother's Master's degree. The family is very proud of her, but she doesn't want a print.

Day 3 Summary:
 #Person1#: Did Bean send these dirty jokes to you, too? Look!#Person2#: What a creep! Phony good luck e-mails are one thing, but sexual harassment is crossing the line.

üß© Retrieved Intra-User Memory Context:
 A man asks a friend to run over to the store for a few things. "We need a small bag of sugar, four oranges, and a half gallon of milk," the man says.

üë• Retrieved Cross-User Memory Context:
 A family member shows off a picture of their mother's Master's degree. The family is very proud of her, but she doesn't want a print.

 Cross-User Memory-Aware Summary:
 #Person1#: Did Bean send these dirty jokes to you, too? Look! #Person2#: What a creep! P