In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset
import torch

  from .autonotebook import tqdm as notebook_tqdm


#### LIME

In [None]:
tokenizer = AutoTokenizer.from_pretrained('/home/ubuntu/Otree_Project/models/online_shopping_dataset_60model')
model = AutoModelForSequenceClassification.from_pretrained('/home/ubuntu/Otree_Project/models/online_shopping_dataset_60model', num_labels=2).to("cuda")
# 预测函数 输出分类概率
def get_prediction(text):
    inputs = tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors="pt").to("cuda")
    outputs = model(**inputs)
    probs = outputs[0].softmax(1)
    return int(probs.argmax())

In [None]:
df = pd.read_excel('/home/ubuntu/Otree_Project/Co-Learning/co_learning/情感分析_20220915.xlsx')
text = df['标题/微博内容'].iloc[:100].to_list()

In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
from collections import Counter
from lime.lime_text import LimeTextExplainer
class_names = ['消极', '积极']
explainer = LimeTextExplainer(class_names=class_names)
# 数据集
for i in range(96,100):
    exp = explainer.explain_instance(text[i], get_prediction, num_samples=40, labels=(1,))
    exp.save_to_file('/home/ubuntu/Otree_Project/lime_html/lime_exp%d.html' % i,labels=(1,),predict_proba=False,show_predicted_value=False)

#### simulation 寻找70%模型

In [2]:
origin_df = pd.read_csv('/home/ubuntu/Otree_Project/dataset/online_shopping_sentiment.csv')
origin_df = origin_df.sample(frac=1,random_state=1).reset_index(drop=True)
train_df,test_df = train_test_split(origin_df, test_size=0.2, random_state=1)
test_df = test_df.iloc[:1000]

In [3]:
tokenizer = AutoTokenizer.from_pretrained('/home/ubuntu/Otree_Project/models/2022-09-07-19-51')
model = AutoModelForSequenceClassification.from_pretrained('/home/ubuntu/Otree_Project/models/2022-09-07-19-51', num_labels=2).to("cuda")

In [5]:
from transformers import Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, cohen_kappa_score
# 评价指标构造
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    kap = cohen_kappa_score(labels, preds)
    return {
      'accuracy': acc, 'kappa':kap
    }
training_args = TrainingArguments(
    output_dir='./results',          # output directory
    num_train_epochs=3,              # total number of training epochs
    per_device_train_batch_size=12,  # batch size per device during training
    per_device_eval_batch_size=12,   # batch size for evaluation
    warmup_steps=500,
    weight_decay=0.01,               # strength of weight decay
    logging_dir='./logs',            # directory for storing logs
    load_best_model_at_end=True,     # load the best model when finished training (default metric is loss)
    evaluation_strategy="epoch",
    save_strategy="epoch"
)
class My_Dataset(Dataset):
    def __init__(self,dataframe, tokenizer):
        df = dataframe.sample(frac=1.0).reset_index(drop=True)
        self.x = tokenizer(df['text'].tolist(), truncation=True, padding=True, max_length=256)
        self.y = df['label'].tolist()
 
    def __getitem__(self, index):
        item = {k: torch.tensor(v[index]) for k, v in self.x.items()}
        item['label'] = torch.tensor([self.y[index]])
        return item
 
    def __len__(self):
        return len(self.y)

In [7]:
test_dataset = My_Dataset(test_df,tokenizer)
for i in range(10,20):
    train_dataset = My_Dataset(train_df.iloc[:100],tokenizer)
    trainer = Trainer(
        model=model,                         # the instantiated Transformers model to be trained
        args=training_args,                  # training arguments, defined above
        train_dataset=train_dataset,         # training dataset
        eval_dataset=test_dataset,          # evaluation dataset
        compute_metrics=compute_metrics,     # the callback that computes metrics of interest
    )
    trainer.train()

***** Running training *****
  Num examples = 100
  Num Epochs = 3
  Instantaneous batch size per device = 12
  Total train batch size (w. parallel, distributed & accumulation) = 12
  Gradient Accumulation steps = 1
  Total optimization steps = 27


Epoch,Training Loss,Validation Loss,Accuracy,Kappa
1,No log,0.664416,0.594,0.193779
2,No log,0.663294,0.587,0.178365
3,No log,0.663602,0.593,0.190816


***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-9
Configuration saved in ./results/checkpoint-9/config.json
Model weights saved in ./results/checkpoint-9/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-18
Configuration saved in ./results/checkpoint-18/config.json
Model weights saved in ./results/checkpoint-18/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-27
Configuration saved in ./results/checkpoint-27/config.json
Model weights saved in ./results/checkpoint-27/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./results/checkpoint-18 (score: 0.663293719291687).
***** Running training *****
  Num examples = 100
  Num Epochs = 3
  Instantaneous batch size per devic

Epoch,Training Loss,Validation Loss,Accuracy,Kappa
1,No log,0.663686,0.592,0.189601
2,No log,0.670807,0.571,0.150858
3,No log,0.675783,0.577,0.16284


***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-9
Configuration saved in ./results/checkpoint-9/config.json
Model weights saved in ./results/checkpoint-9/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-18
Configuration saved in ./results/checkpoint-18/config.json
Model weights saved in ./results/checkpoint-18/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-27
Configuration saved in ./results/checkpoint-27/config.json
Model weights saved in ./results/checkpoint-27/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./results/checkpoint-9 (score: 0.6636862754821777).
***** Running training *****
  Num examples = 100
  Num Epochs = 3
  Instantaneous batch size per devic

Epoch,Training Loss,Validation Loss,Accuracy,Kappa
1,No log,0.665181,0.59,0.185939
2,No log,0.671672,0.578,0.164131
3,No log,0.678594,0.573,0.154817


***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-9
Configuration saved in ./results/checkpoint-9/config.json
Model weights saved in ./results/checkpoint-9/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-18
Configuration saved in ./results/checkpoint-18/config.json
Model weights saved in ./results/checkpoint-18/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-27
Configuration saved in ./results/checkpoint-27/config.json
Model weights saved in ./results/checkpoint-27/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./results/checkpoint-9 (score: 0.665181040763855).
***** Running training *****
  Num examples = 100
  Num Epochs = 3
  Instantaneous batch size per device

Epoch,Training Loss,Validation Loss,Accuracy,Kappa
1,No log,0.666305,0.591,0.188183
2,No log,0.673011,0.577,0.162734
3,No log,0.677649,0.578,0.163177


***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-9
Configuration saved in ./results/checkpoint-9/config.json
Model weights saved in ./results/checkpoint-9/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-18
Configuration saved in ./results/checkpoint-18/config.json
Model weights saved in ./results/checkpoint-18/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-27
Configuration saved in ./results/checkpoint-27/config.json
Model weights saved in ./results/checkpoint-27/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./results/checkpoint-9 (score: 0.6663053035736084).
***** Running training *****
  Num examples = 100
  Num Epochs = 3
  Instantaneous batch size per devic

Epoch,Training Loss,Validation Loss,Accuracy,Kappa
1,No log,0.667857,0.586,0.178519
2,No log,0.674357,0.579,0.166165
3,No log,0.681503,0.575,0.158562


***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-9
Configuration saved in ./results/checkpoint-9/config.json
Model weights saved in ./results/checkpoint-9/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-18
Configuration saved in ./results/checkpoint-18/config.json
Model weights saved in ./results/checkpoint-18/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-27
Configuration saved in ./results/checkpoint-27/config.json
Model weights saved in ./results/checkpoint-27/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./results/checkpoint-9 (score: 0.6678565144538879).
***** Running training *****
  Num examples = 100
  Num Epochs = 3
  Instantaneous batch size per devic

Epoch,Training Loss,Validation Loss,Accuracy,Kappa
1,No log,0.669241,0.587,0.180868
2,No log,0.675679,0.58,0.168304
3,No log,0.680112,0.582,0.171003


***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-9
Configuration saved in ./results/checkpoint-9/config.json
Model weights saved in ./results/checkpoint-9/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-18
Configuration saved in ./results/checkpoint-18/config.json
Model weights saved in ./results/checkpoint-18/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-27
Configuration saved in ./results/checkpoint-27/config.json
Model weights saved in ./results/checkpoint-27/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./results/checkpoint-9 (score: 0.6692412495613098).
***** Running training *****
  Num examples = 100
  Num Epochs = 3
  Instantaneous batch size per devic

Epoch,Training Loss,Validation Loss,Accuracy,Kappa
1,No log,0.6706,0.587,0.180764
2,No log,0.676393,0.581,0.169916


***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-9
Configuration saved in ./results/checkpoint-9/config.json
Model weights saved in ./results/checkpoint-9/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12
Saving model checkpoint to ./results/checkpoint-18
Configuration saved in ./results/checkpoint-18/config.json
Model weights saved in ./results/checkpoint-18/pytorch_model.bin
***** Running Evaluation *****
  Num examples = 1000
  Batch size = 12


KeyboardInterrupt: 

In [None]:
trainer.save_pretrai