In [1]:
import pandas as pd
import ast
from agent_class import InformationExtractionAgent
import json
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from transformers import (AutoTokenizer,
                          AutoModelForCausalLM,
                          BitsAndBytesConfig,
                          TrainingArguments,
                          Trainer,
                          pipeline,
                          DataCollatorForLanguageModeling,
                          PreTrainedTokenizer)
from peft import LoraConfig, get_peft_model
import huggingface_hub
import os
import logging
from tqdm import tqdm
from sklearn.metrics import (accuracy_score,
                             classification_report,
                             confusion_matrix)
from typing import List
import wandb
from lora_llm import evaluate

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [None]:
!pip install transformers
!pip install peft
!pip install 'accelerate>=0.26.0'
!pip install -U bitsandbytes
!pip install huggingface-hub
!pip install datasets
!pip install langgraph
!pip install boto3
!pip install langchain_aws

In [3]:
# Read df that contain list of names and transform the column into a list object
df = pd.read_csv('preprocessed_data/names/named_test_set_proc_final.csv', index_col=False)
df['combined_entities'] = df['combined_entities'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else [])
named_df = df[df['combined_entities'].apply(lambda x: isinstance(x, list) and len(x) > 0)]

named_dataset = pd.read_csv('preprocessed_data/test_set_named.csv',  index_col=False)
merged = pd.merge(named_df, named_dataset, on='Unnamed: 0', how='inner')
unique_interview_df = merged.loc[merged.drop_duplicates(subset='interview_question').index]

In [None]:
agent = InformationExtractionAgent()
buffer = []  # store all processed rows
batch_size = 10

for _, row in unique_interview_df.iterrows():
    row_id = row["Unnamed: 0"]  # adjust if your ID column name is different
    names = row["combined_entities"]
    text = f"{row['interview_question']} A. {row['interview_answer']}"
    
    for name in names:
        agent_results = agent.run(name=name, text=text)
        tailored_summary = agent_results.get('tailored_summary', '')
        
        buffer.append({
            "id": row_id,
            "name": name,
            "status": agent_results.get('final_state', ''),
            "tailored_summary": tailored_summary
        })

        # Flush every batch_size entries for safety
        if len(buffer) >= batch_size:
            pd.DataFrame(buffer).to_csv('tailored_summaries2.csv', mode="a", header=False, index=False)
            buffer.clear()

# Final flush for leftovers
if buffer:
    pd.DataFrame(buffer).to_csv('tailored_summaries2.csv', mode="a", header=False, index=False)

-------------

In [4]:
summaries = pd.read_csv('tailored_summaries2.csv', index_col=False, header=None, names=["ID", "names", "status", "names_information"])

summaries["names_information"] = summaries["names_information"].fillna("").str.strip()

grouped = summaries.groupby("ID")["names_information"]

all_empty = grouped.apply(lambda s: s.eq("").all())
any_non_empty = grouped.apply(lambda s: s.ne("").any())

ids_all_empty = all_empty[all_empty].index.tolist()
ids_with_non_empty = any_non_empty[any_non_empty].index.tolist()


# Clean up summaries (optional: remove NaN and strip whitespace)
summaries["names_information"] = summaries["names_information"].fillna("").str.strip()

# Merge summaries for each ID into one string, separated by e.g. "; "
merged = summaries.groupby("ID")["names_information"].agg(lambda x: "/n ".join(s for s in x if s))


merged.to_csv('proccessed_summaries.csv', index=False)

df = pd.read_csv('preprocessed_data/test_set_named.csv', index_col=False)
df.rename(columns={"Unnamed: 0": "ID"}, inplace=True)

merged_df = pd.merge(merged, df, on="ID", how="inner")

final_df = merged_df[merged_df["ID"].isin(ids_with_non_empty)][["names_information", "question", "interview_question", "interview_answer", "evasion_label", "clarity_label"]]
final_df.to_csv('named_test_set_info.csv', index=False)

with open('secrets.json', 'r') as file:
    secrets = json.load(file)
    huggingface_hub.login(secrets.get('HF_KEY'))

class_names = []
base_model_name = 'meta-llama/Llama-3.1-8B-Instruct'
label_name = "evasion_label"
fine_tuned_model_path = f"./llama3.1"

evaluate(base_model_name,
         fine_tuned_model_path,
         "evasion_label",
         "clarity_label",
         "named_test_set_info.csv",
         False)

Unnamed: 0,ID,names,status,names_information
0,2,Koizumi,Not matched info,
1,2,Roh,Not matched info,
2,2,Condi,Tailored info,"Condoleezza ""Condi"" Rice was the United States..."
3,2,Vladimir Putin,Tailored info,Vladimir Putin is the President of Russia who ...
4,2,Hu Jintao,Tailored info,Hu Jintao was the President of China and leade...
...,...,...,...,...
429,301,man Boehner,No wiki entry,
430,304,Condi,Tailored info,"""Condi"" refers to Condoleezza Rice, who served..."
431,306,Don Powell,Not matched info,
432,306,April Ryan,Tailored info,April Ryan is a veteran White House correspond...


In [14]:
# Load unproccesed summaries data
summaries_df = pd.read_csv(
    'tailored_summaries2.csv',
    index_col=False,
    header=None,
    names=["ID", "names", "status", "names_information"]
)

# Remove NaN and strip whitespace from summaries and group by ID
summaries_df["names_information"] = summaries_df["names_information"].fillna("").str.strip()
grouped_summaries = summaries_df.groupby("ID")["names_information"]

# Find if grouped summaries for an ID is non-empty and extract lists of IDs
any_non_empty_mask = grouped_summaries.apply(lambda s: s.ne("").any())
ids_with_non_empty = any_non_empty_mask[any_non_empty_mask].index.tolist()

# Merge summaries for each ID into one string
merged_summaries_df = summaries_df.groupby("ID")["names_information"] \
    .agg(lambda x: "\n".join(s for s in x if s))

merged_summaries_df.to_csv('processed_summaries.csv', index=False)

# Load test set and merge with summaries
named_test_set_df = pd.read_csv('preprocessed_data/test_set_named.csv', index_col=False)
named_test_set_df.rename(columns={"Unnamed: 0": "ID"}, inplace=True)
rag_test_df = pd.merge(merged_summaries_df, named_test_set_df, on="ID", how="inner")

# Filter to only IDs with at least one non-empty summary
rag_test_df = rag_test_df[
    rag_test_df["ID"].isin(ids_with_non_empty)
][[
    "names_information",
    "question",
    "interview_question",
    "interview_answer",
    "evasion_label",
    "clarity_label"
]]

rag_test_df.to_csv('named_test_set_info.csv', index=False)

# Test RAG results
with open('secrets.json', 'r') as file:
    secrets = json.load(file)
    huggingface_hub.login(secrets.get('HF_KEY'))

class_names = []
base_model_name = 'meta-llama/Llama-3.1-8B-Instruct'
label_name = "evasion_label"
fine_tuned_model_path = "./llama3.1"

evaluate(
    base_model_name,
    fine_tuned_model_path,
    "evasion_label",
    "clarity_label",
    "named_test_set_info.csv",
    False
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0
  8%|▊         | 1/13 [00:04<00:58,  4.87s/it]

Right label: implicit 

### selected question
Right label: implicit 

### selected question
Right label: dodging 

### partial
Right label: dodging 

### part
Right label: dodging 

### explicit
Right label: explicit 

### part of
Right label: implicit 

### selected question
Right label: dodging 

### why


 15%|█▌        | 2/13 [00:10<00:56,  5.09s/it]

Right label: explicit 

### selected question
Right label: explicit 

### selected question
Right label: dodging 

### partial
Right label: dodging 

### selected
Right label: deflection 

### partial
Right label: dodging 

### selected
Right label: explicit 

### selected question
Right label: dodging 

### selected


 23%|██▎       | 3/13 [00:15<00:50,  5.05s/it]

Right label: dodging 

### part
Right label: dodging 

### explicit
Right label: implicit 

### selected question
Right label: explicit 

### part of
Right label: explicit 

### part of
Right label: dodging 

### part
Right label: declining to answer
Right label: general 

### part of


 31%|███       | 4/13 [00:19<00:44,  4.94s/it]

Right label: implicit 

### explicit response
Right label: implicit 

### part of
Right label: deflection 

### selected
Right label: implicit 

### selected question
Right label: explicit 

### part of
Right label: dodging 

### explicit
Right label: implicit 

### selected question
Right label: implicit 

### selected question


 38%|███▊      | 5/13 [00:24<00:39,  4.92s/it]

Right label: dodging 

### partial
Right label: implicit 

### selected question
Right label: dodging 

### explicit
Right label: explicit 

### explicit label
Right label: explicit 

### part of
Right label: implicit 

### partial/h
Right label: explicit 

### part of
Right label: dodging 

### selected


 46%|████▌     | 6/13 [00:30<00:35,  5.06s/it]

Right label: implicit 

### implicit reason
Right label: implicit 

### explicit label
Right label: dodging 

### selected
Right label: dodging 

### partial
Right label: implicit 

### explicit label
Right label: dodging 

### explicit
Right label: dodging 

### selected
Right label: explicit 

### selected question


 54%|█████▍    | 7/13 [00:35<00:31,  5.25s/it]

Right label: dodging 

### selected
Right label: implicit 

### selected question
Right label: implicit 

### selected question
Right label: dodging 

### selected
Right label: explicit 

### why this
Right label: explicit 

### part of
Right label: implicit 

### selected question
Right label: explicit 

### explicit response


 62%|██████▏   | 8/13 [00:40<00:25,  5.05s/it]

Right label: explicit 

### explicit label
Right label: explicit 

### explicit label
Right label: dodging 

### partial
Right label: explicit 

### selected question
Right label: explicit 

### partial/h
Right label: implicit 

### selected question
Right label: dodging 

### partial
Right label: implicit 

### selected question


 69%|██████▉   | 9/13 [00:45<00:20,  5.03s/it]

Right label: dodging 

### part
Right label: explicit 

### explicit response
Right label: explicit 

### partial/h
Right label: explicit 

### part of
Right label: deflection 

### selected
Right label: dodging 

### selected
Right label: explicit 

### partial/h
Right label: dodging 

### explicit


 77%|███████▋  | 10/13 [00:50<00:15,  5.03s/it]

Right label: dodging 

### partial
Right label: deflection 

### partial
Right label: dodging 

### explicit
Right label: implicit 

 

the response
Right label: explicit 

### part of
Right label: implicit 

### why this
Right label: explicit 

### partial/
Right label: dodging 

### selected


 85%|████████▍ | 11/13 [00:55<00:10,  5.09s/it]

Right label: dodging 

### selected
Right label: dodging 

### selected
Right label: deflection 

### part
Right label: explicit 

### part of
Right label: explicit 

### selected question
Right label: dodging 

reason:
Right label: explicit 

### partial/h
Right label: explicit 

### partial/h


 92%|█████████▏| 12/13 [01:00<00:05,  5.09s/it]

Right label: explicit 

### part of
Right label: implicit 

### selected question
Right label: dodging 

### partial
Right label: dodging 

### selected
Right label: explicit 

### explicit label
Right label: implicit 

### implicit reason
Right label: implicit 

### selected question
Right label: implicit 

### selected question


100%|██████████| 13/13 [01:01<00:00,  4.71s/it]

Right label: explicit 

### part of
----
0         indirect
1         indirect
2         indirect
3         indirect
4         indirect
          ...     
92    direct reply
93        indirect
94        indirect
95        indirect
96    direct reply
Name: clarity_label, Length: 97, dtype: object
----
----
0          indirect
1          indirect
2          indirect
3          indirect
4          indirect
           ...     
102        indirect
103    direct reply
105        indirect
106        indirect
107        indirect
Name: clarity_label, Length: 97, dtype: object
----
['indirect', 'direct reply', 'direct non-reply']
Accuracy: 0.65
Accuracy for label indirect: 0.72
Accuracy for label direct reply: 0.57
Accuracy for label direct non-reply: 0.00

Classification Report:
                  precision    recall  f1-score   support

        indirect       0.74      0.72      0.73        64
    direct_reply       0.50      0.57      0.53        30
direct_non-reply       0.00      0.00      0


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
