In [None]:
!pip install git+https://github.com/huggingface/transformers
!pip install -q peft  accelerate bitsandbytes safetensors
!pip install --upgrade torch torchvision torchaudio
!pip install autotrain-advanced
!pip install huggingface_hub


In [None]:
# !pip install --upgrade bitsandbytes

In [None]:
!huggingface-cli login --token $secret_hf


In [None]:
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer,pipeline
import transformers
adapters_name = "cti-ttp-18/ttp-extraction-llama"
model_name = "meta-llama/Llama-2-7b-chat-hf"


device = "cuda" # the device to load the model onto

bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)


model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config,
    device_map='auto',
    is_decoder = True
)

model = PeftModel.from_pretrained(model, adapters_name)
#model = model.merge_and_unload()

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.bos_token_id = 1

stop_token_ids = [0]

print(f"Successfully loaded the model {model_name} into memory")

In [None]:
import pandas as pd
import re
import gc


def get_gpu_free_memory():
    total_memory = torch.cuda.get_device_properties(0).total_memory
    reserved_memory = torch.cuda.memory_reserved(0)
    allocated_memory = torch.cuda.memory_allocated(0)
    free_memory = total_memory - (reserved_memory + allocated_memory)
    return free_memory

df = pd.read_csv('/kaggle/input/chat-model-test-csv/test.csv')
num_of_rows = df.shape[0]
print("Total testing datapoints: ",num_of_rows)

predicted_json = []
testing_count = 0






In [None]:
for i in range(num_of_rows):


    row_df = df.iloc[i]

    #print(df)

    #take instruction and input column
    #instruction = row_df['instruction']
    input = row_df['input']
    output = row_df['output']



    text = input
    text = text.replace('\n','')
    text = text.replace('\r\n','')
    text = str(text)
    text = "<s>[INST] "+ text +" [/INST]" #to be changed
    
    print()
    print("GPU free memory available: ",get_gpu_free_memory()/(1024*1024*1024)," GB")
    
    print("Processing ",i+1,"th datapoint.Text size(in words): ",len(text.split(" ")))
    
#     indices_to_be_ignored = [3,49,56,88,89,97,103,112,144,177]
#     indices_to_be_ignored_set=set(indices_to_be_ignored)
    if(len(text.split(" ")) > 2000):
         print("Ignored ",i+1,"th datapoint.Text size(in words): ",len(text.split(" ")))        
         continue

    



    # text = "[INST] generate a midjourney prompt for A person walks in the rain [/INST]"

    encoded = tokenizer(text, return_tensors="pt", add_special_tokens=False)
    model_input = encoded
#     model.to(device)
    generated_ids = model.generate(**model_input, max_new_tokens=2048, do_sample=True)
    decoded = tokenizer.batch_decode(generated_ids)
    #print(decoded[0])
    #break
    
    response = decoded[0].split("[/INST]")[1]
    responded_techniques = response.split(",")

    responded_techniques_list =[]
    for st in responded_techniques:
        st = st.replace('\n','')
        st = st.replace('\r\n','')
        st = st.replace('</s>','')
        st = st.strip()
        responded_techniques_list.append(st)


    predicted_techniques = ','.join(responded_techniques_list)
    #print(predicted_techniques)
    #break
    
    predicted_json.append({
        "Article" : input,
        "Actual_output" : output,
        "Predicted_output" : predicted_techniques

    })
    print("Testing for ",i+1,"th datapoints done.Text size(in words): ",len(text.split(" ")))
    
    testing_count = testing_count + 1

    predicted_df=pd.DataFrame(predicted_json)
    #predicted_df
    predicted_df.to_csv('/kaggle/working/test_evaluation_base_llama2.csv', index=False)
    print("Storing ",i+1,"datapoints to csv.Text size(in words): ",len(text.split(" ")))
    gc.collect()


print("Tested datapoint count: ",testing_count)
print("test_evaluation.csv is created successfully")

In [None]:
#evaluating testset performance

import pandas as pd
df = pd.read_csv('/kaggle/input/chat-model-test-epoch-10/test_evaluation.csv')
df = df.dropna()

print("Total datapoints to be evaluated :",df.shape[0])

# Initialize metrics
label_to_metrics = {}
total_correct_predictions = 0
total_labels_predicted = 0


for _, row in df.iterrows():
    
    #converting to list
    Actual_output_list = row['Actual_output'].split(",")
    Actual_outputs=[]
    for st in Actual_output_list:
        st = st.replace('\n','')
        st = st.replace('\r\n','')
        st = st.replace('</s>','')
        st = st.strip()
        Actual_outputs.append(st)
        
    Predicted_output_list = row['Predicted_output'].split(",")
    Predicted_outputs=[]
    for st in Predicted_output_list:
        st = st.replace('\n','')
        st = st.replace('\r\n','')
        st = st.replace('</s>','')
        st = st.strip()
        Predicted_outputs.append(st)
    
 

    
    actual_labels = set(Actual_outputs)
    predicted_labels = set(Predicted_outputs)
    
    
    # Update total for accuracy calculation
    total_correct_predictions += len(actual_labels.intersection(predicted_labels))
    total_labels_predicted += len(actual_labels.union(predicted_labels))
    
    all_labels = actual_labels.union(predicted_labels)
    for label in all_labels:
        if label not in label_to_metrics:
            label_to_metrics[label] = {'TP': 0, 'FP': 0, 'FN': 0}
        
        if label in actual_labels:
            if label in predicted_labels:
                label_to_metrics[label]['TP'] += 1  # True positive
            else:
                label_to_metrics[label]['FN'] += 1  # False negative
        if label in predicted_labels:
            if label not in actual_labels:
                label_to_metrics[label]['FP'] += 1  # False positive

# Calculate precision, recall, and F1 for each label and overall
overall_precision = 0
overall_recall = 0
for label, metrics in label_to_metrics.items():
    precision = metrics['TP'] / (metrics['TP'] + metrics['FP']) if metrics['TP'] + metrics['FP'] > 0 else 0
    recall = metrics['TP'] / (metrics['TP'] + metrics['FN']) if metrics['TP'] + metrics['FN'] > 0 else 0
    
    overall_precision += precision
    overall_recall += recall

num_labels = len(label_to_metrics)
overall_precision /= num_labels
overall_recall /= num_labels
f1_score = 2 * (overall_precision * overall_recall) / (overall_precision + overall_recall) if overall_precision + overall_recall > 0 else 0

# Calculate accuracy
accuracy = total_correct_predictions / total_labels_predicted if total_labels_predicted > 0 else 0

print("Epoch 10")
print("--------")
print(f'Overall Precision: {overall_precision}')
print(f'Overall Recall: {overall_recall}')
print(f'F1 Score: {f1_score}')
print(f'Accuracy: {accuracy}')

In [None]:
import pandas as pd

# Load the DataFrame
df = pd.read_csv('/kaggle/input/chat-model-test-epoch-10/test_evaluation.csv')
df = df.dropna()

print("Total datapoints to be evaluated:", df.shape[0])

# Initialize aggregate counts for micro-average calculation
total_TP = 0
total_FP = 0
total_FN = 0

for _, row in df.iterrows():
    # Convert actual and predicted outputs from strings to lists, cleaning in the process
    Actual_outputs = [st.strip() for st in row['Actual_output'].replace('</s>', '').split(",")]
    Predicted_outputs = [st.strip() for st in row['Predicted_output'].replace('</s>', '').split(",")]

    actual_labels = set(Actual_outputs)
    predicted_labels = set(Predicted_outputs)
    
    # Update aggregate counts for micro-average calculation
    total_TP += len(actual_labels.intersection(predicted_labels))
    total_FP += len(predicted_labels - actual_labels)
    total_FN += len(actual_labels - predicted_labels)

# Calculate micro-averaged precision, recall, and F1 score
micro_precision = total_TP / (total_TP + total_FP) if total_TP + total_FP > 0 else 0
micro_recall = total_TP / (total_TP + total_FN) if total_TP + total_FN > 0 else 0
micro_f1_score = 2 * (micro_precision * micro_recall) / (micro_precision + micro_recall) if micro_precision + micro_recall > 0 else 0

print("Epoch 10")
print("--------")
print(f'Micro Precision: {micro_precision:.4f}')
print(f'Micro Recall: {micro_recall:.4f}')
print(f'Micro F1 Score: {micro_f1_score:.4f}')
