In [1]:
!pip install -q sentence-transformers scikit-learn


In [2]:
import os
import re
import torch
import torch.nn as nn
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.model_selection import train_test_split


In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"

encoder = SentenceTransformer("all-MiniLM-L6-v2").to(device)
EMB_DIM = 384


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [4]:
def split_sentences(text):
    return re.split(r'(?<=[.!?])\s+', text)

def normalize(s):
    return str(s).lower().strip()

LABEL_MAP = {
    "consistent": 1,
    "contradict": 0
}


In [5]:
NOVEL_LOOKUP = {}

for fname in os.listdir("novels"):
    if fname.endswith(".txt"):
        key = normalize(fname.replace(".txt", ""))
        NOVEL_LOOKUP[key] = os.path.join("novels", fname)

print("Loaded novels:")
for k, v in NOVEL_LOOKUP.items():
    print(k, "->", v)


Loaded novels:
in search of the castaways -> novels/In search of the castaways.txt
the count of monte cristo -> novels/The Count of Monte Cristo.txt


In [6]:
def get_novel_path(row):
    key = normalize(row["book_name"])
    if key not in NOVEL_LOOKUP:
        raise ValueError(f"Novel not found for book_name: {row['book_name']}")
    return NOVEL_LOOKUP[key]


In [7]:
def temporal_split(text):
    words = text.split()
    n = len(words)

    return [
        " ".join(words[:int(0.2*n)]),          # early
        " ".join(words[int(0.2*n):int(0.7*n)]),# middle
        " ".join(words[int(0.7*n):])           # late
    ]


In [8]:
class BDHMemory(nn.Module):
    def __init__(self, dim, decay=0.9):
        super().__init__()
        self.gate = nn.Linear(dim, dim)
        self.delta = nn.Linear(dim, dim)
        self.decay = decay

    def forward(self, state, evidence):
        g = torch.sigmoid(self.gate(evidence))
        d = torch.tanh(self.delta(evidence))
        update = g * d

        return self.decay * state + (1 - self.decay) * update


In [9]:
def extract_evidence(sentences):
    causal, support, contradict, neutral = [], [], [], []

    for s in sentences:
        sl = s.lower()
        if any(k in sl for k in ["because", "therefore", "led to", "as a result"]):
            causal.append(s)
        elif any(k in sl for k in ["but", "however", "although"]):
            contradict.append(s)
        else:
            neutral.append(s)

    return causal, support, contradict, neutral


In [None]:
def process_novel(novel_path, belief_mem, causal_mem, char_mem, character_name):
    with open(novel_path, "r", encoding="utf-8", errors="ignore") as f:
        text = f.read()

    segments = temporal_split(text)

    belief = torch.zeros(1, EMB_DIM).to(device)
    causal = torch.zeros(1, EMB_DIM).to(device)
    character = torch.zeros(1, EMB_DIM).to(device)

    coverage = {"support":0, "contradict":0, "neutral":0}

    for seg in segments:
        sentences = split_sentences(seg)
        causal_s, support, contradict, neutral = extract_evidence(sentences)

        coverage["support"] += len(support)
        coverage["contradict"] += len(contradict)
        coverage["neutral"] += len(neutral)

        if sentences:
            emb = encoder.encode(
                sentences, convert_to_tensor=True
            ).mean(dim=0, keepdim=True)
            belief = belief_mem(belief, emb)

        if causal_s:
            cemb = encoder.encode(
                causal_s, convert_to_tensor=True
            ).mean(dim=0, keepdim=True)
            causal = causal_mem(causal, cemb)

        
        char_sents = [s for s in sentences if character_name.lower() in s.lower()]
        if char_sents:
            ch_emb = encoder.encode(
                char_sents, convert_to_tensor=True
            ).mean(dim=0, keepdim=True)
            character = char_mem(character, ch_emb)

    return belief, causal, character, coverage


In [11]:
class ConsistencyClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(EMB_DIM * 4, EMB_DIM),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(EMB_DIM, 1)
        )

    def forward(self, belief, causal, character, backstory):
        x = torch.cat([belief, causal, character, backstory], dim=1)
        return torch.sigmoid(self.net(x))


In [12]:
def train_full_model(train_csv):
    df = pd.read_csv(train_csv)

    belief_mem = BDHMemory(EMB_DIM).to(device)
    causal_mem = BDHMemory(EMB_DIM).to(device)
    char_mem = BDHMemory(EMB_DIM).to(device)
    clf = ConsistencyClassifier().to(device)

    optimizer = torch.optim.Adam(
        list(belief_mem.parameters()) +
        list(causal_mem.parameters()) +
        list(char_mem.parameters()) +
        list(clf.parameters()),
        lr=1e-3
    )

    loss_fn = nn.BCELoss()

    for epoch in range(2):
        print(f"\nEpoch {epoch+1}")
        for _, row in df.iterrows():
            novel_path = get_novel_path(row)

            belief, causal, char_state, _ = process_novel(
               novel_path, belief_mem, causal_mem, char_mem, row["char"]
                             )


            backstory = encoder.encode(
                [row["content"]], convert_to_tensor=True
            ).to(device)

            label = torch.tensor(
                [[LABEL_MAP[row["label"]]]], dtype=torch.float
            ).to(device)

            pred = clf(belief, causal, char_state, backstory)
            loss = loss_fn(pred, label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    return belief_mem, causal_mem, char_mem, clf


In [None]:
def save_model(belief, causal, char, clf, path="bdh_model.pt"):
    torch.save({
        "belief": belief.state_dict(),
        "causal": causal.state_dict(),
        "character": char.state_dict(),
        "clf": clf.state_dict()
    }, path)
    print("Model saved")


In [14]:
def load_model(path="bdh_model.pt"):
    belief = BDHMemory(EMB_DIM).to(device)
    causal = BDHMemory(EMB_DIM).to(device)
    char = BDHMemory(EMB_DIM).to(device)
    clf = ConsistencyClassifier().to(device)

    ckpt = torch.load(path, map_location=device)
    belief.load_state_dict(ckpt["belief"])
    causal.load_state_dict(ckpt["causal"])
    char.load_state_dict(ckpt["character"])
    clf.load_state_dict(ckpt["clf"])

    belief.eval(); causal.eval(); char.eval(); clf.eval()
    return belief, causal, char, clf


In [None]:
def generate_result_csv(test_csv, belief, causal, char, clf):
    df = pd.read_csv(test_csv, encoding="latin-1")
    rows = []

    with torch.no_grad():
        for idx, row in df.iterrows():
            novel_path = get_novel_path(row)

            belief_s, causal_s, char_s, _ = process_novel(
                novel_path,
                belief,
                causal,
                char,
                row["char"]
            )

            backstory = encoder.encode(
                [row["content"]],
                convert_to_tensor=True
            ).to(device)

            prob = clf(belief_s, causal_s, char_s, backstory).item()
            rows.append([idx, int(prob > 0.5)])

    pd.DataFrame(rows, columns=["id","prediction"]).to_csv(
        "result.csv", index=False
    )

    print("result.csv generated")


In [None]:
belief_mem, causal_mem, char_mem, clf = train_full_model("train.csv")
save_model(belief_mem, causal_mem, char_mem, clf)



Epoch 1


In [None]:
belief, causal, char, clf = load_model("bdh_model.pt")



In [None]:
generate_result_csv("testr.csv", belief, causal, char, clf)