In [1]:
!pip install transformers
!pip install accelerate -U

Collecting accelerate
  Downloading accelerate-0.34.2-py3-none-any.whl.metadata (19 kB)
Downloading accelerate-0.34.2-py3-none-any.whl (324 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m324.4/324.4 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: accelerate
  Attempting uninstall: accelerate
    Found existing installation: accelerate 0.33.0
    Uninstalling accelerate-0.33.0:
      Successfully uninstalled accelerate-0.33.0
Successfully installed accelerate-0.34.2


In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score
from transformers import (AutoTokenizer,
                          AutoModelForSequenceClassification,
                          AutoConfig,
                          TrainingArguments,
                          Trainer,
                          DataCollatorWithPadding)
import torch
from torch.utils.data import Dataset
import os
import matplotlib.pyplot as plt
import seaborn as sns

ModuleNotFoundError: No module named 'transformers'

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
df = pd.read_csv('/content/drive/MyDrive/17k_synthetic_data (1).csv',index_col=0).fillna(" ").sample(10000)
y = df.pop('subject')

In [7]:
# create dictionaries with id to label and label to id mappings which are going
# to be used with the classification model
id2label = dict(enumerate(y.unique()))
label2id = {v: k for (k,v) in id2label.items()}

In [8]:
# encode class labels
y = y.map(label2id)

In [25]:
# create stratified train, validation and test datasets
text_train, text_test, y_train, y_test = train_test_split(
    df,
    y,
    test_size=0.2,
    stratify=y,
    random_state=42
)
text_val, text_test, y_val, y_test = train_test_split(
    text_test,
    y_test,
    test_size=0.5,
    stratify=y_test,
    random_state=42
)

In [26]:
# load tokenizer of pretrained model
# here we will use BERT, but we can use any model
model_name = 'bert-base-uncased'
config = AutoConfig.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)



In [27]:
class TextDataset(Dataset):
    def __init__(self, text, labels, tokenizer, config):
        self.tokenized_txt = tokenizer(
            text,
            max_length=config.max_length,
            truncation=True,
            padding=True
        )
        self.labels = labels

    def __len__(self):
      return len(self.labels)

    def __getitem__(self, idx):
      item = {
          key: torch.tensor(val[idx]) for key, val in self.tokenized_txt.items()
      }
      item['labels'] = torch.tensor(self.labels[idx])
      return item


In [28]:
ds_train = TextDataset(
    text_train['sentence'].to_list(),
    y_train.to_list(),
    tokenizer,
    config
)
ds_val = TextDataset(
    text_val['sentence'].to_list(),
    y_val.to_list(),
    tokenizer,
    config
)
ds_test = TextDataset(
    text_test['sentence'].to_list(),
    y_test.to_list(),
    tokenizer,
    config
)

In [29]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    f1_result = f1_score(labels, preds, average='weighted')
    acc = accuracy_score(labels, preds)
    balanced_acc = balanced_accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'balanced_accuracy': balanced_acc,
        'f1_score': f1_result
        }

In [30]:
# create data collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [31]:
# load model for classification
num_labels = len(label2id)
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id
)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [34]:
# create a dir to store model checkpoints
my_dir = 'model_checkpoints'
os.mkdir(my_dir)

TypeError: 'exists_ok' is an invalid keyword argument for mkdir()

In [35]:
# define training arguments
training_args = TrainingArguments(
    output_dir=my_dir,
    learning_rate=5e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=30,
    weight_decay=0.01,
    evaluation_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
)



In [36]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=ds_train,
    eval_dataset=ds_val,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [37]:
# train the model
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,Balanced Accuracy,F1 Score
1,No log,0.591753,0.845,0.835838,0.843818
2,0.918500,0.514584,0.851,0.840897,0.848905
3,0.918500,0.520096,0.865,0.863539,0.865332
4,0.188300,0.526406,0.87,0.86322,0.870047
5,0.188300,0.536514,0.876,0.878145,0.876708
6,0.085900,0.624775,0.877,0.874536,0.875256
7,0.085900,0.595247,0.883,0.878803,0.881917
8,0.045100,0.673676,0.881,0.87613,0.88036
9,0.045100,0.807726,0.86,0.85287,0.859689
10,0.025100,0.748161,0.877,0.875397,0.876878


TrainOutput(global_step=7500, training_loss=0.08763013089497884, metrics={'train_runtime': 1601.7411, 'train_samples_per_second': 149.837, 'train_steps_per_second': 4.682, 'total_flos': 2467286265600000.0, 'train_loss': 0.08763013089497884, 'epoch': 30.0})

In [38]:
# evaluate model performance
trainer.evaluate(ds_test)

{'eval_loss': 0.5454147458076477,
 'eval_accuracy': 0.842,
 'eval_balanced_accuracy': 0.8344392475200368,
 'eval_f1_score': 0.8410846209996921,
 'eval_runtime': 1.2689,
 'eval_samples_per_second': 788.113,
 'eval_steps_per_second': 25.22,
 'epoch': 30.0}

In [39]:
model.save_pretrained('05deepak/subject_classifier_on_10_k_synthetic_data')
tokenizer.save_pretrained('05deepak/subject_classifier_on_10_k_synthetic_data')

('05deepak/subject_classifier_on_10_k_synthetic_data/tokenizer_config.json',
 '05deepak/subject_classifier_on_10_k_synthetic_data/special_tokens_map.json',
 '05deepak/subject_classifier_on_10_k_synthetic_data/vocab.txt',
 '05deepak/subject_classifier_on_10_k_synthetic_data/added_tokens.json',
 '05deepak/subject_classifier_on_10_k_synthetic_data/tokenizer.json')

In [50]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) ''
Invalid input. Must be one of ('y', 'yes', '1', 'n', 'no', '0', '')
Add token as git credential? (Y/n) Y
Token is valid (permission: write).
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You

In [52]:
# Make sure y"ou're passing the repository name, not a local path
model_name = "05deepak/subject_classifier_on_10_k_synthetic_data"

# Push model to Hugging Face Hub
model.push_to_hub("05deepak/onelove")

# Push tokenizer to Hugging Face Hub
tokenizer.push_to_hub("05deepak/onelove")


README.md:   0%|          | 0.00/24.0 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/05deepak/onelove/commit/b47f6418f1627ae0990dca2b9fe8ac0bd2a42295', commit_message='Upload tokenizer', commit_description='', oid='b47f6418f1627ae0990dca2b9fe8ac0bd2a42295', pr_url=None, pr_revision=None, pr_num=None)

In [53]:
tokenizer.push_to_hub("05deepak/onelove")

No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/05deepak/onelove/commit/b47f6418f1627ae0990dca2b9fe8ac0bd2a42295', commit_message='Upload tokenizer', commit_description='', oid='b47f6418f1627ae0990dca2b9fe8ac0bd2a42295', pr_url=None, pr_revision=None, pr_num=None)

In [21]:
unseen_df= pd.read_csv('/content/drive/MyDrive/17k_synthetic_data (1).csv',index_col=0).fillna(" ").iloc[10000:12500]

In [22]:
y_unseen = unseen_df.pop('subject')

In [23]:
unseen_df['subject']=y_unseen.map(label2id)


In [2]:
pip install transformers

Collecting transformersNote: you may need to restart the kernel to use updated packages.

  Using cached transformers-4.44.2-py3-none-any.whl.metadata (43 kB)
Collecting safetensors>=0.4.1 (from transformers)
  Using cached safetensors-0.4.5-cp312-none-win_amd64.whl.metadata (3.9 kB)
Collecting tokenizers<0.20,>=0.19 (from transformers)
  Using cached tokenizers-0.19.1-cp312-none-win_amd64.whl.metadata (6.9 kB)
Using cached transformers-4.44.2-py3-none-any.whl (9.5 MB)
Using cached safetensors-0.4.5-cp312-none-win_amd64.whl (286 kB)
Using cached tokenizers-0.19.1-cp312-none-win_amd64.whl (2.2 MB)
Installing collected packages: safetensors, tokenizers, transformers
Successfully installed safetensors-0.4.5 tokenizers-0.19.1 transformers-4.44.2


In [6]:
pip install torch

Collecting torch
  Using cached torch-2.4.1-cp312-cp312-win_amd64.whl.metadata (27 kB)
Collecting sympy (from torch)
  Using cached sympy-1.13.2-py3-none-any.whl.metadata (12 kB)
Collecting networkx (from torch)
  Using cached networkx-3.3-py3-none-any.whl.metadata (5.1 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy->torch)
  Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Using cached torch-2.4.1-cp312-cp312-win_amd64.whl (199.4 MB)
Using cached networkx-3.3-py3-none-any.whl (1.7 MB)
Using cached sympy-1.13.2-py3-none-any.whl (6.2 MB)
Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)
Installing collected packages: mpmath, sympy, networkx, torch
Successfully installed mpmath-1.3.0 networkx-3.3 sympy-1.13.2 torch-2.4.1
Note: you may need to restart the kernel to use updated packages.


In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_checkpoint = "05deepak/subject_classifeir_lora_modelv3GGuf"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForCausalLM.from_pretrained(model_checkpoint)

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/ce/03/ce03e3e30ab2f6e73aaa3c9fd1ef309916af5a35b53272c6de059be089b793ea/8ee46ed9f5c9db867bfc5b81f9f5e470aeaa7b4c61e71a1e0c252da94107d67a?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27pytorch_model-00001-of-00004.bin%3B+filename%3D%22pytorch_model-00001-of-00004.bin%22%3B&response-content-type=application%2Foctet-stream&Expires=1726343354&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyNjM0MzM1NH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2NlLzAzL2NlMDNlM2UzMGFiMmY2ZTczYWFhM2M5ZmQxZWYzMDk5MTZhZjVhMzViNTMyNzJjNmRlMDU5YmUwODliNzkzZWEvOGVlNDZlZDlmNWM5ZGI4NjdiZmM1YjgxZjlmNWU0NzBhZWFhN2I0YzYxZTcxYTFlMGMyNTJkYTk0MTA3ZDY3YT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=BAz80rStM%7EUnxDYFDalTyonmSHQeDUi9A5aREHunONm4VhzztMGHLSfbE7cOp3cQv6ILN

ConnectionError: (MaxRetryError('HTTPSConnectionPool(host=\'cdn-lfs-us-1.huggingface.co\', port=443): Max retries exceeded with url: /repos/ce/03/ce03e3e30ab2f6e73aaa3c9fd1ef309916af5a35b53272c6de059be089b793ea/8ee46ed9f5c9db867bfc5b81f9f5e470aeaa7b4c61e71a1e0c252da94107d67a?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27pytorch_model-00001-of-00004.bin%3B+filename%3D%22pytorch_model-00001-of-00004.bin%22%3B&response-content-type=application%2Foctet-stream&Expires=1726343354&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyNjM0MzM1NH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zL2NlLzAzL2NlMDNlM2UzMGFiMmY2ZTczYWFhM2M5ZmQxZWYzMDk5MTZhZjVhMzViNTMyNzJjNmRlMDU5YmUwODliNzkzZWEvOGVlNDZlZDlmNWM5ZGI4NjdiZmM1YjgxZjlmNWU0NzBhZWFhN2I0YzYxZTcxYTFlMGMyNTJkYTk0MTA3ZDY3YT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=BAz80rStM~UnxDYFDalTyonmSHQeDUi9A5aREHunONm4VhzztMGHLSfbE7cOp3cQv6ILNL~Kn0xVzR4ZN8kz4UT8P9qcDmBFVka4dvJ5WZpEhPTb8UMsPUI0t1iYusB8mOTGgnFVKmiTbs0QzOk7T8L8vGfozPUzzaDM3qvAl1JQEkuEkZiakOs8bTcN5As6~nJcim-J1t9MgM9peaS1QM32zMiOJkE0yX9a~BlAwwYXPy7O-Lu1R~z5ubU9bbl-c0OxRZy8~0yhyGxFL9GTAR8I1F7JAhvteyjmDaDUXemUtnQliKs5AjIVZXYLL0JZaYCbWNfSsKy0oiZgpHUveQ__&Key-Pair-Id=K24J24Z295AEI9 (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x000001D7A1D6EE70>: Failed to resolve \'cdn-lfs-us-1.huggingface.co\' ([Errno 11001] getaddrinfo failed)"))'), '(Request ID: 4e892897-4baf-4fd0-a372-b95d01d63e47)')

In [None]:
pred=[]
inputs = tokenizer(

                f"only from this{labels_list} labels options you have to provide below response not opther than this options"  +X_eval_dataset[i]["text"] , return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
pred.append(tokenizer.batch_decode(outputs)[0].split("\n")[-1][:-15])

In [11]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import os 
os.environ["HUGGINGFACE_TOKEN"]="''"
tokenizer1 = AutoTokenizer.from_pretrained("model/05deepaksubject_classifeir_lora_modelv3",token="''")
model1 = AutoModelForSequenceClassification.from_pretrained("model/05deepaksubject_classifeir_lora_modelv3",token="''")

OSError: model/05deepaksubject_classifeir_lora_modelv3 is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

In [12]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_checkpoint = "05deepak/onelove"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForCausalLM.from_pretrained(model_checkpoint)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.19k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/2.16k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`
Some weights of BertLMHeadModel were not initialized from the model checkpoint at 05deepak/onelove and are newly initialized: ['cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:

def generate_text( sequence, max_length=128):

    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
    ids = tokenizer.encode(f'{sequence}', return_tensors='pt').to('cuda')
    final_outputs = model.generate(
        ids,
        do_sample=True,
        max_length=max_length,
        pad_token_id=model.config.eos_token_id,
        top_k=50,
        top_p=0.95,
    )
    print(tokenizer.decode(final_outputs[0], skip_special_tokens=True))

In [24]:
unseen_df

Unnamed: 0,sentence,subject
16873,Socialism has a long history in the United Sta...,6
10337,Diverse cultural backgrounds enrich the learni...,22
2721,Phonological constraints help in distinguishin...,25
7445,A budget is a financial plan for a business's ...,18
7893,Referral marketing can be used to target speci...,18
...,...,...
10179,Effective classroom management is critical for...,22
5934,Sociologists of culture focus on the ways in w...,21
14644,Audit sampling can be used to test internal co...,19
5810,"Work is a central aspect of human life, and th...",21


In [27]:
unseen_df['sentence'].iloc[0]

'Socialism has a long history in the United States, with the Socialist Party of America being founded in 1901.'

In [31]:
import torch
import torch.nn.functional as F

def generate_text_with_probabilities(sequence, max_length=128):
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

    # Tokenize input
    input_ids = tokenizer.encode(f'{sequence}', return_tensors='pt').to('cuda')

    # Initialize empty list to store generated tokens and probabilities
    generated_ids = input_ids.clone().detach()
    probabilities = []

    # Loop to generate tokens step by step
    for _ in range(max_length - len(input_ids[0])):  # max_length includes the input sequence
        outputs = model(input_ids=generated_ids)
        logits = outputs.logits[:, -1, :]  # Get the logits of the last generated token

        # Apply softmax to get probabilities
        probs = F.softmax(logits, dim=-1)

        # Sample the next token based on the probability distribution
        next_token = torch.multinomial(probs, num_samples=1)
        next_token_prob = probs[0, next_token].item()  # Get the probability of the selected token

        # Append the next token and its probability
        generated_ids = torch.cat((generated_ids, next_token), dim=1)
        probabilities.append(next_token_prob)

        # If the next token is EOS, stop the generation
        if next_token.item() == model.config.eos_token_id:
            break

    # Decode the generated token IDs to text
    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

    print("Generated Text:", generated_text)
    print("Probabilities:", probabilities)

    return generated_text, probabilities


In [32]:
text="Socialism has a long history in the United States, with the Socialist Party of America being founded in 1901."
generate_text_with_probabilities(text)

RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx

In [33]:
generate_text(text)

RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc
from sklearn.preprocessing import label_binarize
from itertools import cycle

class ClassificationEvaluator:
    def __init__(self, model, X_test, y_test, y_pred,y_pred_proba,class_names):
        self.model = model
        self.X_test = X_test
        self.y_test = y_test
        self.class_names = class_names
        self.y_pred = y_pred
        self.y_pred_proba = y_pred_proba

    def plot_confusion_matrix(self):
        cm = confusion_matrix(self.y_test, self.y_pred)
        plt.figure(figsize=(10, 8))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=self.class_names, yticklabels=self.class_names)
        plt.title('Confusion Matrix')
        plt.xlabel('Predicted')
        plt.ylabel('Actual')
        plt.show()

    def print_classification_report(self):
        report = classification_report(self.y_test, self.y_pred, target_names=self.class_names)
        print("Classification Report:")
        print(report)

    def plot_roc_curve(self):
        n_classes = len(self.class_names)
        y_test_bin = label_binarize(self.y_test, classes=range(n_classes))

        fpr = dict()
        tpr = dict()
        roc_auc = dict()
        for i in range(n_classes):
            fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], self.y_pred_proba[:, i])
            roc_auc[i] = auc(fpr[i], tpr[i])

        plt.figure(figsize=(10, 8))
        colors = cycle(['blue', 'red', 'green', 'yellow', 'purple'])
        for i, color in zip(range(n_classes), colors):
            plt.plot(fpr[i], tpr[i], color=color, lw=2,
                     label=f'ROC curve of class {self.class_names[i]} (area = {roc_auc[i]:0.2f})')
        plt.plot([0, 1], [0, 1], 'k--', lw=2)
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver Operating Characteristic (ROC) Curve')
        plt.legend(loc="lower right")
        plt.show()

    def plot_prediction_distribution(self):
        plt.figure(figsize=(10, 6))
        for i, class_name in enumerate(self.class_names):
            sns.kdeplot(self.y_pred_proba[:, i], shade=True, label=class_name)
        plt.title('Prediction Probability Distribution')
        plt.xlabel('Probability')
        plt.ylabel('Density')
        plt.legend()
        plt.show()


In [None]:
import warnings
warnings.filterwarnings("ignore")
# Assuming the grid search code has been run and best_model_ is available

# Get unique classes from y_train or y_test
class_names = np.unique(y_test)

# Create an instance of ClassificationEvaluator
evaluator = ClassificationEvaluator(ec, x_test, y_test, y_pred,y_pred_proba, class_names)

# Perform evaluations
print("Evaluating the best model from grid search...")
evaluator.plot_confusion_matrix()

In [None]:
print("\n classification Report \n")
evaluator.print_classification_report()

In [None]:
print("\n ROC Curve \n")
evaluator.plot_roc_curve()

In [None]:
print("\n plot_prediction_distribution \n")
evaluator.plot_prediction_distribution()