In [2]:
from transformers import pipeline
from datetime import datetime
from langchain_core.documents import Document
from collections import defaultdict

import pandas as pd
import os

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
classifier = pipeline("zero-shot-classification", "MoritzLaurer/deberta-v3-large-zeroshot-v2.0")

loc = os.getenv('DATA')

sms = pd.read_csv(loc+"sms.csv", header=None)
sms = sms.loc[:, [2, 4, 5, 9, 12, 18]]
sms.columns = ["Address", "Date Sent", "Date Received", "Type", "Body", "Seen"]
for i in sms[1:]:
    sms[i] = sms[i].transform(lambda x: x.split("=")[1])

sms["Address"] = sms["Address"].replace(regex="^\\+1", value="")
sms["Date Sent"] = sms["Date Sent"].transform(lambda x: datetime.fromtimestamp(int(x)//1000).strftime("%d-%m-%Y %H:%M:%S"))
sms["Type"] = sms["Type"].transform(lambda x: "Received" if x == "1" else "Sent")
sms["Seen"] = sms["Seen"].transform(lambda x: "True" if x == "1" else "False")
    
for i in sms.index:
    sms.at[i, "Date Received"] = sms.at[i, "Date Sent"] if sms.at[i, "Type"] == "Sent" else datetime.fromtimestamp(int(sms.at[i, "Date Received"])//1000).strftime("%d-%m-%Y %H:%M:%S")
        
sms_documents = []
for i in sms.index:
    sms_documents.append(
        Document(
            page_content=sms.at[i, "Body"],
            metadata={
                "Sender": sms.at[i, "Address"],
                "Date Time Sent": sms.at[i, "Date Sent"],
                "Date Time Received": sms.at[i, "Date Received"],
                "Type": sms.at[i, "Type"],
                "Seen": sms.at[i, "Seen"]
            }
    )
)

candidate_labels = [
    "identity_attack",
    "sexual_explicit",
    "threat",
    "toxicity",
    "normal",
    "extremism"
]

res = defaultdict(str)
keys = set([x.metadata["Sender"] for x in reversed(sms_documents)])

for k in keys:
    for i in sms_documents:
        if k == i.metadata["Sender"]:
            res[k] = res[k] + i.page_content + ". "

for k in res.keys():
    print(k, classifier(res[k], candidate_labels))

Device set to use cpu


[Document(metadata={'Sender': '7899977860', 'Date Time Sent': '21-11-2025 01:01:17', 'Date Time Received': '21-11-2025 01:01:17', 'Type': 'Received', 'Seen': 'True'}, page_content='Oh I’m shaking. You talk big but all I see is someone desperate to look tough.'), Document(metadata={'Sender': '9999999999', 'Date Time Sent': '21-11-2025 00:59:43', 'Date Time Received': '21-11-2025 00:59:43', 'Type': 'Sent', 'Seen': 'True'}, page_content='We don’t bow to tyrants hiding behind darkness. If it’s a clash of convictions you want  the Legion stands ready. We won’t let your corruption swallow the realm.'), Document(metadata={'Sender': '9999999999', 'Date Time Sent': '21-11-2025 00:59:34', 'Date Time Received': '21-11-2025 00:59:35', 'Type': 'Received', 'Seen': 'True'}, page_content='You cling to that naive radiance like it protects you. The Obsidian Order’s rise is inevitable. Oppose us  and you’ll be swept aside with the rest of the deluded.'), Document(metadata={'Sender': '9999999999', 'Date T

6505551212 {'sequence': "I'll see if there's donuts in the break room.. I accept praise in the form of pastries.. You're a hero lol.. Yeah. Took forever though.. Oh wow. Did it finally work?. I stayed up fixing that server issue.. Same. Rough night?. Barely. Need coffee first.. Morning Bob! You awake?. ", 'labels': ['normal', 'identity_attack', 'extremism', 'toxicity', 'sexual_explicit', 'threat'], 'scores': [0.5520278215408325, 0.20149928331375122, 0.12858204543590546, 0.06696857511997223, 0.029601268470287323, 0.021320993080735207]}
9999999999 {'sequence': 'We don’t bow to tyrants hiding behind darkness. If it’s a clash of convictions you want  the Legion stands ready. We won’t let your corruption swallow the realm.. You cling to that naive radiance like it protects you. The Obsidian Order’s rise is inevitable. Oppose us  and you’ll be swept aside with the rest of the deluded.. Your shadows are nothing but fear masquerading as strength. The Legion will burn away every twisted doctrin