In [None]:
!pip install transformers
!pip install datasets
!pip install sacremoses

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.24.0-py3-none-any.whl (5.5 MB)
[K     |████████████████████████████████| 5.5 MB 14.9 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 55.3 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.10.1-py3-none-any.whl (163 kB)
[K     |████████████████████████████████| 163 kB 56.6 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.10.1 tokenizers-0.13.1 transformers-4.24.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.6.1-py3-none-any.whl (441 kB)
[K     |████████████████████████████████| 441 kB 

In [None]:
import pandas as pd
import torch
from tqdm.notebook import tqdm
from transformers import BertTokenizer
from torch.utils.data import TensorDataset
from transformers import BertForSequenceClassification
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from transformers import AdamW, get_linear_schedule_with_warmup
from sklearn.metrics import f1_score
import numpy as np
import random

In [None]:
df = pd.read_csv('train.csv')
df.head()

Unnamed: 0,Tweets,label,S/G
0,بکواس مت کرو,1,1
1,تمہاری ہیجڑا فورس ایک نہتے کے سامنے بکری بنی ہ...,1,0
2,آفیسر سمیتبھارتی فوجی جہنم واصل،بنکرز تباہ بھا...,1,1
3,غدار منافق میر اللہ تمہیں زلیل کرے,1,1
4,اگست پر آپ بھارت کو کیا پیغام دینا چاہیں گے؟ م...,1,0


In [None]:
df['S/G'].value_counts()

2    1782
0    1341
1     441
Name: S/G, dtype: int64

In [None]:
df.drop('label', inplace=True, axis=1)

In [None]:
df.head()

Unnamed: 0,Tweets,S/G
0,بکواس مت کرو,1
1,تمہاری ہیجڑا فورس ایک نہتے کے سامنے بکری بنی ہ...,0
2,آفیسر سمیتبھارتی فوجی جہنم واصل،بنکرز تباہ بھا...,1
3,غدار منافق میر اللہ تمہیں زلیل کرے,1
4,اگست پر آپ بھارت کو کیا پیغام دینا چاہیں گے؟ م...,0


In [None]:
df.columns = df.columns.str.replace('S/G', 'label')

In [None]:
df.head()

Unnamed: 0,Tweets,label
0,بکواس مت کرو,1
1,تمہاری ہیجڑا فورس ایک نہتے کے سامنے بکری بنی ہ...,0
2,آفیسر سمیتبھارتی فوجی جہنم واصل،بنکرز تباہ بھا...,1
3,غدار منافق میر اللہ تمہیں زلیل کرے,1
4,اگست پر آپ بھارت کو کیا پیغام دینا چاہیں گے؟ م...,0


In [None]:
df['label'].value_counts()

2    1782
0    1341
1     441
Name: label, dtype: int64

In [None]:
label_dict = {
    'Group':1,
    'Individual':0,
    'Non-Threatening':2
}

In [None]:
#Because the labels are imbalanced, we split the data set in a stratified fashion, using this as the class labels.

X_train, X_val, y_train, y_val = train_test_split(df.index.values, 
                                                  df.label.values, 
                                                  test_size=0.15, 
                                                  random_state=42, 
                                                  stratify=df.label.values)

df['data_type'] = ['not_set']*df.shape[0]

df.loc[X_train, 'data_type'] = 'train'
df.loc[X_val, 'data_type'] = 'val'

df.groupby(['label', 'data_type']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Tweets
label,data_type,Unnamed: 2_level_1
0,train,1140
0,val,201
1,train,375
1,val,66
2,train,1514
2,val,268


In [None]:
tokenizer = BertTokenizer.from_pretrained('google/muril-base-cased', 
                                          do_lower_case=True)
                                          
encoded_data_train = tokenizer.batch_encode_plus(
    df[df.data_type=='train'].Tweets.values, 
    add_special_tokens=True, 
    return_attention_mask=True, 
    pad_to_max_length=True, 
    max_length=256, 
    return_tensors='pt'
)

encoded_data_val = tokenizer.batch_encode_plus(
    df[df.data_type=='val'].Tweets.values, 
    add_special_tokens=True, 
    return_attention_mask=True, 
    pad_to_max_length=True, 
    max_length=256, 
    return_tensors='pt'
)


input_ids_train = encoded_data_train['input_ids']
attention_masks_train = encoded_data_train['attention_mask']
labels_train = torch.tensor(df[df.data_type=='train'].label.values)

input_ids_val = encoded_data_val['input_ids']
attention_masks_val = encoded_data_val['attention_mask']
labels_val = torch.tensor(df[df.data_type=='val'].label.values)

dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train)
dataset_val = TensorDataset(input_ids_val, attention_masks_val, labels_val)

Downloading:   0%|          | 0.00/3.16M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/113 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/206 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/411 [00:00<?, ?B/s]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [None]:
model = BertForSequenceClassification.from_pretrained("google/muril-base-cased",
                                                      num_labels=3,
                                                      output_attentions=False,
                                                      output_hidden_states=False)
model.cuda()

Downloading:   0%|          | 0.00/953M [00:00<?, ?B/s]

Some weights of the model checkpoint at google/muril-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not in

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(197285, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elemen

In [None]:
batch_size = 3

dataloader_train = DataLoader(dataset_train, 
                              sampler=RandomSampler(dataset_train), 
                              batch_size=batch_size)

dataloader_validation = DataLoader(dataset_val, 
                                   sampler=SequentialSampler(dataset_val), 
                                   batch_size=batch_size)

In [None]:
optimizer = AdamW(model.parameters(),
                  lr=1e-5, 
                  eps=1e-8)
                  
epochs = 5

scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps=0,
                                            num_training_steps=len(dataloader_train)*epochs)



In [None]:
def f1_score_func(preds, labels):
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return f1_score(labels_flat, preds_flat, average='weighted')

def accuracy_per_class(preds, labels):
    label_dict_inverse = {v: k for k, v in label_dict.items()}
    
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()

    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat==label]
        y_true = labels_flat[labels_flat==label]
        print(f'Class: {label_dict_inverse[label]}')
        print(f'Accuracy: {len(y_preds[y_preds==label])}/{len(y_true)}\n')

In [None]:
import torch

if torch.cuda.is_available():
    device = "cuda:0"
else:
    device = "cpu"

device = torch.device(device)

In [None]:
seed_val = 17
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

def evaluate(dataloader_val):

    model.eval()
    
    loss_val_total = 0
    predictions, true_vals = [], []
    
    for batch in dataloader_val:
        
        batch = tuple(b.to(device) for b in batch)
        
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                 }

        with torch.no_grad():        
            outputs = model(**inputs)
            
        loss = outputs[0]
        logits = outputs[1]
        loss_val_total += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)
    
    loss_val_avg = loss_val_total/len(dataloader_val) 
    
    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)
            
    return loss_val_avg, predictions, true_vals
    
for epoch in tqdm(range(1, epochs+1)):
    
    model.train()
    
    loss_train_total = 0

    progress_bar = tqdm(dataloader_train, desc='Epoch {:1d}'.format(epoch), leave=False, disable=False)
    for batch in progress_bar:

        model.zero_grad()
        
        batch = tuple(b.to(device) for b in batch)
        
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                 }       

        outputs = model(**inputs)
        
        loss = outputs[0]
        loss_train_total += loss.item()
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        optimizer.step()
        scheduler.step()
        
        progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item()/len(batch))})
         
        
    torch.save(model.state_dict(), f'./finetuned_BERT_epoch_{epoch}.model')
        
    tqdm.write(f'\nEpoch {epoch}')
    
    loss_train_avg = loss_train_total/len(dataloader_train)            
    tqdm.write(f'Training loss: {loss_train_avg}')
    
    val_loss, predictions, true_vals = evaluate(dataloader_validation)
    val_f1 = f1_score_func(predictions, true_vals)
    tqdm.write(f'Validation loss: {val_loss}')
    tqdm.write(f'F1 Score (Weighted): {val_f1}')

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/1010 [00:00<?, ?it/s]


Epoch 1
Training loss: 1.0035080009757882
Validation loss: 0.9699254991621945
F1 Score (Weighted): 0.33437227220353577


Epoch 2:   0%|          | 0/1010 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.9515973137156798
Validation loss: 0.9095290812700154
F1 Score (Weighted): 0.5317836700868791


Epoch 3:   0%|          | 0/1010 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.850982858609445
Validation loss: 0.8546364953398039
F1 Score (Weighted): 0.5851591034335392


Epoch 4:   0%|          | 0/1010 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.7507119548792887
Validation loss: 0.8413737939889204
F1 Score (Weighted): 0.5937853053971682


Epoch 5:   0%|          | 0/1010 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.6839901532129486
Validation loss: 0.8477646323056195
F1 Score (Weighted): 0.635600315820371


In [None]:
model = BertForSequenceClassification.from_pretrained("google/muril-base-cased",
                                                      num_labels=len(label_dict),
                                                      output_attentions=False,
                                                      output_hidden_states=False)

model.to(device)

model.load_state_dict(torch.load('./finetuned_BERT_epoch_5.model', map_location=torch.device('cpu')))

_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class(predictions, true_vals)

Some weights of the model checkpoint at google/muril-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not in

Class: Individual
Accuracy: 129/201

Class: Group
Accuracy: 14/66

Class: Non-Threatening
Accuracy: 205/268



In [None]:
predictions

array([[ 0.8169834 , -0.7309538 , -0.21028489],
       [ 0.0976925 ,  0.3577395 , -0.45194098],
       [-0.19436446, -1.3185284 ,  1.5228101 ],
       ...,
       [-0.21607178, -1.3046621 ,  1.5335548 ],
       [-0.23425895, -1.2721483 ,  1.5239319 ],
       [-0.2406842 , -1.2672476 ,  1.5251713 ]], dtype=float32)

In [None]:
preds_flat = np.argmax(predictions, axis=1).flatten()
preds_flat

array([0, 1, 2, 0, 1, 0, 2, 2, 1, 0, 1, 1, 2, 2, 2, 0, 0, 0, 0, 2, 0, 0,
       2, 0, 0, 0, 1, 0, 2, 2, 0, 0, 2, 0, 1, 0, 2, 0, 0, 0, 0, 2, 0, 0,
       0, 2, 2, 0, 0, 0, 0, 2, 2, 0, 1, 2, 0, 2, 2, 1, 2, 2, 0, 0, 2, 0,
       0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 2, 0, 0, 1, 2, 0,
       0, 0, 1, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 2, 2, 0, 2, 2, 0, 0, 1,
       2, 0, 0, 2, 0, 0, 2, 2, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 1, 2, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0,
       2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 2,
       2, 0, 0, 2, 2, 2, 0, 1, 1, 2, 0, 0, 2, 0, 0, 0, 1, 2, 2, 0, 2, 2,
       2, 0, 2, 2, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 2,
       2, 0, 1, 2, 0, 2, 0, 0, 2, 0, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0,
       0, 2, 0, 1, 0, 2, 0, 0, 2, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 2,
       2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 0, 2, 0,
       0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

In [None]:
import torch
class tweetDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels=None):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        if self.labels:
            item["labels"] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])

In [None]:
df0 = pd.read_csv('test.csv')
df0.head()

Unnamed: 0,Tweet,Threat,S/G
0,وزیراعل ی پنجاب کا ضلع راولپنڈی کا دورہ لینڈسل...,,
1,الحمدلله پورے پاکستان کے وکلاء برادری کا کہنا ...,,
2,جنوری مہلت ختم الٹی گنتی شروع ن لیگ کے خلا...,,
3,کروناکی آڑ میں حکومت اورفوج جوکھیل قادیانیوں ک...,,
4,حکومت ایکشن میں آئے ورنہ اگر ہم ایکشن میں آگئے...,,


In [None]:
input = list(df0['Tweet'])

In [None]:
temp = tokenizer.batch_encode_plus(input, add_special_tokens=True, return_attention_mask=True, pad_to_max_length=True, max_length=256, return_tensors='pt')



In [None]:
test_dataset = tweetDataset(temp)

In [None]:
from transformers import Trainer

In [None]:
test_trainer = Trainer(model)

In [None]:
  raw_pred, _, _ = test_trainer.predict(test_dataset)

***** Running Prediction *****
  Num examples = 935
  Batch size = 8
  


In [None]:
y_pred = np.argmax(raw_pred, axis=1)

In [None]:
y_pred

array([2, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2,
       1, 0, 2, 2, 2, 2, 0, 0, 2, 0, 0, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 0,
       2, 2, 0, 0, 2, 2, 2, 0, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2,
       2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2,
       2, 0, 2, 0, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 0, 0, 2, 2, 0, 0,
       2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 2, 2, 0, 2, 0, 2, 2, 2,
       0, 2, 2, 2, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2,
       2, 0, 0, 2, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2,
       2, 2, 0, 2, 2, 2, 0, 2, 0, 0, 0, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2,
       2, 2, 2, 2, 0, 0, 0, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2,
       2, 0, 2, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 0, 2, 2, 2, 0,
       2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2,
       2, 0, 0, 0, 2, 2, 0, 2, 0, 2, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 1, 0, 2, 0, 2, 0, 2, 2, 0, 2,

In [None]:
import pickle

In [None]:
#pickle.dump(tokenizer, open('model2.pkl', 'wb'))
pickle.dump(model, open('/content/drive/MyDrive/Useless/model2.pkl', 'wb'))

In [None]:
#pickle.dump(model, open('bert.pkl', 'wb'))

In [None]:
pickle.dump(model, open('/content/drive/MyDrive/Useless/bert.pkl', 'wb'))

In [None]:
df0['y_pred'] = y_pred

In [None]:
df0['y_pred'].value_counts()

2    653
0    273
1      9
Name: y_pred, dtype: int64

In [None]:
df0.to_csv('Final_Muril.csv', index=False)

In [None]:
from sklearn import metrics

In [None]:
confusion_matrix = metrics.confusion_matrix(list(df0['S/G']), y_pred)


ValueError: ignored

In [None]:
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = ["Individual", "Group", "Non-Threatening"])

In [None]:
import matplotlib.pyplot as plt

In [None]:
cm_display.plot()
plt.show()

In [None]:
#Accuracy for Multi-class classifcation
from datasets import load_metric
metric1 = load_metric("accuracy")
print(metric1.compute(predictions=y_pred,references=list(df0['S/G'])))

In [None]:
#F1-score for Multi-class classifcation
from sklearn.metrics import f1_score
f1_score(list(df0['S/G']), y_pred, average='macro')

In [None]:
df0.loc[(df0.y_pred == 2),'Binary'] = 0
df0.loc[(df0.y_pred == 0) | (df0.y_pred == 1),'Binary'] = 1

In [None]:
df0.head()

In [None]:
cm = metrics.confusion_matrix(list(df0['Threat']), list(df0['Binary']))

In [None]:
cm_d = metrics.ConfusionMatrixDisplay(confusion_matrix = cm, display_labels = ["Threatening", "Non-Threatening"])

In [None]:
cm_d.plot()
plt.show()

In [None]:
#Accuracy for Binary Clasification
metric11 = load_metric("accuracy")
print(metric11.compute(predictions=list(df0['Binary']),references=list(df0['Threat'])))

In [None]:
#F1-score for Binay Classification
f1_score(list(df0['Threat']), list(df0['Binary']), average='macro')

In [None]:
f1_score(list(df0['Threat']), list(df0['Binary']))

In [None]:
fpr, tpr, thresholds = metrics.roc_curve(list(df0['Threat']), list(df0['Binary']))
metrics.auc(fpr, tpr)
plt.plot(fpr, tpr, linestyle='--',color='orange', label='Threatening vs Non-Threatening')

In [None]:
from sklearn.metrics import roc_auc_score
roc_auc_score(list(df0['Threat']), list(df0['Binary']))

In [None]:
# roc curve for classes
from sklearn.metrics import roc_curve
fpr = {}
tpr = {}
thresh ={}

n_class = 3

for i in range(n_class):    
    fpr[i], tpr[i], thresh[i] = roc_curve(list(df0['S/G']), y_pred, pos_label=i)
    
# plotting    
plt.plot(fpr[0], tpr[0], linestyle='--',color='orange', label='Individual vs Rest')
plt.plot(fpr[1], tpr[1], linestyle='--',color='green', label='Group vs Rest')
plt.plot(fpr[2], tpr[2], linestyle='--',color='blue', label='Non-Threatening vs Rest')
plt.title('Multiclass ROC curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive rate')
plt.legend(loc='best')
plt.savefig('Multiclass ROC',dpi=300); 

In [None]:
df0.to_csv('Final_Muril.csv', index=False)