In [None]:
!pip install scikit-learn
!pip install transformers
!pip install peft
!pip install 'accelerate>=0.26.0'
!pip install -U bitsandbytes
!pip install huggingface-hub
!pip install datasets
!pip install wandb
!pip install -U bitsandbytes

In [1]:
import json
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from transformers import (AutoTokenizer,
                          AutoModelForCausalLM,
                          BitsAndBytesConfig,
                          TrainingArguments,
                          Trainer,
                          pipeline,
                          DataCollatorForLanguageModeling,
                          PreTrainedTokenizer)
from peft import LoraConfig, get_peft_model
import huggingface_hub
import os
import logging
from tqdm import tqdm
from sklearn.metrics import (accuracy_score,
                             classification_report,
                             confusion_matrix)
from typing import List
import wandb
from lora_llm import inference_Bedrock

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
class_names = []
model_name = "us.anthropic.claude-3-sonnet-20240229-v1:0"

result = inference_Bedrock(model_name,
         "evasion_label",
         "clarity_label",
         "preprocessed_data/named_test_set_info.csv",
         added_name_summary=True)



  1%|          | 1/97 [00:01<01:47,  1.12s/it]

Right label: deflection


  2%|▏         | 2/97 [00:03<02:30,  1.58s/it]

Right label: deflection


  3%|▎         | 3/97 [00:05<02:52,  1.84s/it]

Right label: deflection


  4%|▍         | 4/97 [00:07<02:59,  1.93s/it]

Right label: dodging


  5%|▌         | 5/97 [00:09<02:59,  1.95s/it]

Right label: deflection


  6%|▌         | 6/97 [00:10<02:50,  1.88s/it]

Right label: general


  7%|▋         | 7/97 [00:13<03:02,  2.03s/it]

Right label: deflection


  8%|▊         | 8/97 [00:15<02:56,  1.98s/it]

Right label: dodging


  9%|▉         | 9/97 [00:16<02:46,  1.89s/it]

Right label: partial/half-answer


 10%|█         | 10/97 [00:18<02:44,  1.89s/it]

Right label: dodging


 11%|█▏        | 11/97 [00:20<02:44,  1.91s/it]

Right label: dodging


 12%|█▏        | 12/97 [00:22<02:41,  1.90s/it]

Right label: deflection


 13%|█▎        | 13/97 [00:24<02:47,  1.99s/it]

Right label: deflection


 14%|█▍        | 14/97 [00:26<02:43,  1.97s/it]

Right label: deflection


 15%|█▌        | 15/97 [00:28<02:38,  1.93s/it]

Right label: dodging


 16%|█▋        | 16/97 [00:30<02:30,  1.86s/it]

Right label: dodging


 18%|█▊        | 17/97 [00:32<02:33,  1.92s/it]

Right label: dodging


 19%|█▊        | 18/97 [00:33<02:25,  1.84s/it]

Right label: dodging


 20%|█▉        | 19/97 [00:35<02:27,  1.89s/it]

Right label: dodging


 21%|██        | 20/97 [00:37<02:24,  1.87s/it]

Right label: deflection


 22%|██▏       | 21/97 [00:39<02:25,  1.91s/it]

Right label: explicit


 23%|██▎       | 22/97 [00:41<02:28,  1.98s/it]

Right label: deflection


 24%|██▎       | 23/97 [00:43<02:22,  1.92s/it]

Right label: dodging


 25%|██▍       | 24/97 [00:46<02:32,  2.09s/it]

Right label: partial/half-answer


 26%|██▌       | 25/97 [00:48<02:31,  2.10s/it]

Right label: deflection


 27%|██▋       | 26/97 [00:50<02:21,  2.00s/it]

Right label: deflection


 28%|██▊       | 27/97 [00:52<02:31,  2.16s/it]

Right label: partial/half-answer


 29%|██▉       | 28/97 [00:54<02:22,  2.07s/it]

Right label: partial/half-answer


 30%|██▉       | 29/97 [00:56<02:10,  1.92s/it]

Right label: general


 31%|███       | 30/97 [00:58<02:13,  1.99s/it]

Right label: deflection


 32%|███▏      | 31/97 [00:59<02:06,  1.92s/it]

Right label: deflection


 33%|███▎      | 32/97 [01:01<02:02,  1.88s/it]

Right label: implicit


 34%|███▍      | 33/97 [01:03<02:01,  1.89s/it]

Right label: explicit


 35%|███▌      | 34/97 [01:05<01:55,  1.84s/it]

Right label: dodging


 36%|███▌      | 35/97 [01:07<02:02,  1.97s/it]

Right label: deflection


 37%|███▋      | 36/97 [01:09<02:06,  2.07s/it]

Right label: deflection


 38%|███▊      | 37/97 [01:11<02:00,  2.01s/it]

Right label: deflection


 39%|███▉      | 38/97 [01:13<01:52,  1.91s/it]

Right label: deflection


 40%|████      | 39/97 [01:15<01:54,  1.98s/it]

Right label: deflection


 41%|████      | 40/97 [01:17<01:48,  1.90s/it]

Right label: dodging


 42%|████▏     | 41/97 [01:19<01:45,  1.89s/it]

Right label: deflection


 43%|████▎     | 42/97 [01:20<01:39,  1.81s/it]

Right label: deflection


 44%|████▍     | 43/97 [01:23<01:46,  1.97s/it]

Right label: dodging


 45%|████▌     | 44/97 [01:25<01:46,  2.01s/it]

Right label: dodging


 46%|████▋     | 45/97 [01:27<01:43,  2.00s/it]

Right label: partial/half-answer


 47%|████▋     | 46/97 [01:29<01:39,  1.96s/it]

Right label: deflection


 48%|████▊     | 47/97 [01:30<01:34,  1.89s/it]

Right label: dodging


 49%|████▉     | 48/97 [01:32<01:34,  1.92s/it]

Right label: partial/half-answer


 51%|█████     | 49/97 [01:34<01:34,  1.97s/it]

Right label: dodging


 52%|█████▏    | 50/97 [01:36<01:29,  1.89s/it]

Right label: dodging


 53%|█████▎    | 51/97 [01:38<01:30,  1.98s/it]

Right label: deflection


 54%|█████▎    | 52/97 [01:40<01:24,  1.88s/it]

Right label: deflection


 55%|█████▍    | 53/97 [01:42<01:19,  1.80s/it]

Right label: explicit


 56%|█████▌    | 54/97 [01:43<01:17,  1.80s/it]

Right label: dodging


 57%|█████▋    | 55/97 [01:46<01:19,  1.90s/it]

Right label: deflection


 58%|█████▊    | 56/97 [01:48<01:19,  1.94s/it]

Right label: deflection


 59%|█████▉    | 57/97 [01:49<01:14,  1.86s/it]

Right label: deflection


 60%|█████▉    | 58/97 [01:52<01:19,  2.04s/it]

Right label: deflection


 61%|██████    | 59/97 [01:54<01:20,  2.13s/it]

Right label: deflection


 62%|██████▏   | 60/97 [01:56<01:12,  1.97s/it]

Right label: explicit


 63%|██████▎   | 61/97 [01:57<01:08,  1.91s/it]

Right label: explicit


 64%|██████▍   | 62/97 [01:59<01:05,  1.87s/it]

Right label: deflection


 65%|██████▍   | 63/97 [02:01<01:03,  1.87s/it]

Right label: deflection


 66%|██████▌   | 64/97 [02:03<00:59,  1.82s/it]

Right label: general


 67%|██████▋   | 65/97 [02:05<00:58,  1.84s/it]

Right label: deflection


 68%|██████▊   | 66/97 [02:07<00:57,  1.85s/it]

Right label: deflection


 69%|██████▉   | 67/97 [02:08<00:54,  1.82s/it]

Right label: deflection


 70%|███████   | 68/97 [02:10<00:53,  1.83s/it]

Right label: dodging


 71%|███████   | 69/97 [02:13<00:56,  2.00s/it]

Right label: deflection


 72%|███████▏  | 70/97 [02:14<00:53,  1.98s/it]

Right label: partial/half-answer


 73%|███████▎  | 71/97 [02:16<00:49,  1.92s/it]

Right label: dodging


 74%|███████▍  | 72/97 [02:18<00:46,  1.88s/it]

Right label: deflection


 75%|███████▌  | 73/97 [02:20<00:43,  1.82s/it]

Right label: dodging


 76%|███████▋  | 74/97 [02:22<00:41,  1.83s/it]

Right label: deflection


 77%|███████▋  | 75/97 [02:24<00:41,  1.89s/it]

Right label: dodging


 78%|███████▊  | 76/97 [02:26<00:43,  2.05s/it]

Right label: deflection


 79%|███████▉  | 77/97 [02:28<00:38,  1.93s/it]

Right label: implicit


 80%|████████  | 78/97 [02:29<00:35,  1.85s/it]

Right label: dodging


 81%|████████▏ | 79/97 [02:31<00:32,  1.81s/it]

Right label: deflection


 82%|████████▏ | 80/97 [02:33<00:30,  1.82s/it]

Right label: dodging


 84%|████████▎ | 81/97 [02:35<00:29,  1.82s/it]

Right label: dodging


 85%|████████▍ | 82/97 [02:37<00:28,  1.92s/it]

Right label: partial/half-answer


 86%|████████▌ | 83/97 [02:39<00:26,  1.86s/it]

Right label: dodging


 87%|████████▋ | 84/97 [02:40<00:23,  1.82s/it]

Right label: partial/half-answer


 88%|████████▊ | 85/97 [02:42<00:21,  1.81s/it]

Right label: dodging


 89%|████████▊ | 86/97 [02:44<00:20,  1.86s/it]

Right label: deflection


 90%|████████▉ | 87/97 [02:46<00:18,  1.80s/it]

Right label: deflection


 91%|█████████ | 88/97 [02:48<00:16,  1.87s/it]

Right label: explicit


 92%|█████████▏| 89/97 [02:50<00:15,  1.90s/it]

Right label: deflection


 93%|█████████▎| 90/97 [02:52<00:13,  1.88s/it]

Right label: deflection


 94%|█████████▍| 91/97 [02:53<00:11,  1.88s/it]

Right label: deflection


 95%|█████████▍| 92/97 [02:55<00:08,  1.79s/it]

Right label: deflection


 96%|█████████▌| 93/97 [02:57<00:06,  1.74s/it]

Right label: deflection


 97%|█████████▋| 94/97 [02:58<00:05,  1.76s/it]

Right label: deflection


 98%|█████████▊| 95/97 [03:00<00:03,  1.82s/it]

Right label: deflection


 99%|█████████▉| 96/97 [03:02<00:01,  1.79s/it]

Right label: deflection


100%|██████████| 97/97 [03:04<00:00,  1.90s/it]

Right label: deflection
Accuracy: 0.70
Accuracy for label indirect: 0.98
Accuracy for label direct reply: 0.17
Accuracy for label direct non-reply: 0.00

Classification Report:
                  precision    recall  f1-score   support

        indirect       0.69      0.98      0.81        64
    direct_reply       0.83      0.17      0.28        30
direct_non-reply       0.00      0.00      0.00         3

        accuracy                           0.70        97
       macro avg       0.51      0.38      0.36        97
    weighted avg       0.71      0.70      0.62        97


Confusion Matrix:
[[63  1  0]
 [25  5  0]
 [ 3  0  0]]



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [4]:
print(result)

['Accuracy: 0.70', 'Accuracy for label indirect: 0.98', 'Accuracy for label direct reply: 0.24', 'Accuracy for label direct non-reply: 0.17', 'Classification Report:                  precision    recall  f1-score   support\n\n        indirect       0.70      0.98      0.82       174\n    direct_reply       0.86      0.24      0.38        79\ndirect_non-reply       0.80      0.17      0.29        23\n\n       micro avg       0.72      0.70      0.71       276\n       macro avg       0.79      0.46      0.49       276\n    weighted avg       0.76      0.70      0.65       276\n', 'Confusion Matrix:[[170   3   0]\n [ 58  19   1]\n [ 14   0   4]]']
