In [None]:
import torch
import torch.nn as nn
import pandas as pd
from transformers import BertConfig, BertModel
from sklearn.model_selection import train_test_split
class TextCNN(nn.Module):
    def __init__(self, embedding_dim, cnn_output_dim, kernel_size, padding):
        super(TextCNN, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=embedding_dim, out_channels=cnn_output_dim, kernel_size=kernel_size[0], padding=padding)
        self.conv2 = nn.Conv1d(in_channels=embedding_dim, out_channels=cnn_output_dim, kernel_size=kernel_size[1], padding=padding)
        self.conv3 = nn.Conv1d(in_channels=embedding_dim, out_channels=cnn_output_dim, kernel_size=kernel_size[2], padding=padding)
        self.global_pool = nn.AdaptiveMaxPool1d(output_size=1)
        self.dropout = nn.Dropout(0.2)

    def forward(self, inputs):
        cnn1 = nn.functional.relu(self.conv1(inputs))
        cnn1 = self.global_pool(cnn1).squeeze(-1)
        cnn2 = nn.functional.relu(self.conv2(inputs))
        cnn2 = self.global_pool(cnn2).squeeze(-1)
        cnn3 = nn.functional.relu(self.conv3(inputs))
        cnn3 = self.global_pool(cnn3).squeeze(-1)
        output = torch.cat((cnn1, cnn2, cnn3), dim=-1)
        output = self.dropout(output)
        return output

class BertTextCNN(nn.Module):
    def __init__(self, config_path, checkpoint_path, class_nums, cnn_output_dim=256, kernel_size=[3, 4, 5], padding=0):
        super(BertTextCNN, self).__init__()
        bert_config = BertConfig.from_pretrained(config_path)
        self.bert = BertModel.from_pretrained(checkpoint_path, config=bert_config)
        self.textcnn = TextCNN(embedding_dim=bert_config.hidden_size, cnn_output_dim=cnn_output_dim, kernel_size=kernel_size, padding=padding)
        self.fc1 = nn.Linear(bert_config.hidden_size + cnn_output_dim * 3, 512)
        self.fc2 = nn.Linear(512, class_nums)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, inputs):
        bert_output = self.bert(inputs['input_ids'], attention_mask=inputs['attention_mask'], token_type_ids=inputs['token_type_ids'])
        cls_features = bert_output[0][:, 0]
        all_token_embedding = bert_output[0][:, 1:-1]
        cnn_features = self.textcnn(all_token_embedding)
        concat_features = torch.cat((cls_features, cnn_features), dim=-1)
        output = self.fc1(concat_features)
        output = nn.functional.relu(output)
        output = self.fc2(output)
        output = self.softmax(output)
        return output
bert = BertModel.from_pretrained('./rbt3')
bert.config
df = pd.read_csv("./data/train_data.csv")
X = df['text'].tolist()
y = df['label'].tolist()
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)
df1 = pd.DataFrame({'text':X_test,'label':y_test})
df1.to_csv('./data/dev.csv',index=False)

In [18]:
from transformers import BertTokenizer,BertModel
from MyDataSet import MyDataset
from bert_model import BertTextCNN
from torch.utils.data import DataLoader
import pandas as pd
model_name = './rbt3'
num_filter = 256
num_classes = 16
filter_sizes = [3,4,5]
tokenizer = BertTokenizer.from_pretrained('./rbt3')
df = pd.read_csv('./data/train_data.csv')
train_dataset = MyDataset(df,tokenizer,64)
# bert_model = BertModel.from_pretrained('./rbt3/')
# help(bert_model)
input_ids, attention_mask, labels = train_dataset[8]
print(input_ids)
print(attention_mask)
# train_dataloader = DataLoader(train_dataset,batch_size=1,shuffle=True)
# model = BertTextCNN(model_name,num_filter,filter_sizes,num_classes)
# for  in train_dataloader:
#     print(input_ids)
#     model(input_ids,attention_mask)
#     break

tensor([ 101, 4583, 4555, 4638, 3780, 4545, 3175, 3791,  102,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0])
tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])


In [14]:
import pandas as pd
df = pd.read_csv('./data/train_data.csv')
classes = df['label_class']
labels = df['label']
a = set()
for label_class,label in zip(classes,labels):
    info = (label_class,label)
    if info not in a:
        a.add(info)
with open('./data/label.txt','w',encoding='utf-8') as f:
    for t in a:
        f.write(t[0]+" "+ str(t[1]) + '\n')

In [2]:
from bert_model import BertTextCNN
bert_model_name = './rbt3'
num_filters = 256
filter_sizes = [3,4,5]
num_classes = 16
model = BertTextCNN(bert_model_name,num_filters,filter_sizes,num_classes)
print(model.text_cnn)

Some weights of the model checkpoint at ./rbt3 were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


TextCNN(
  (convs): ModuleList(
    (0): Conv2d(1, 256, kernel_size=(3, 768), stride=(1, 1))
    (1): Conv2d(1, 256, kernel_size=(4, 768), stride=(1, 1))
    (2): Conv2d(1, 256, kernel_size=(5, 768), stride=(1, 1))
  )
  (fc): Linear(in_features=768, out_features=16, bias=True)
)
