<a href="https://colab.research.google.com/github/Jace-Yang/Multiclass_Sentiment_Classification_Chinese/blob/main/Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Set up

### Set up for Colab

In [None]:
# For runing notebook in colab
from google.colab import drive
drive.mount('/content/drive')
import os
root_of_repository = '/content/drive/MyDrive/ADL/Project/'
os.chdir(root_of_repository)

Mounted at /content/drive


In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 15.4 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 80.3 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 64.3 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.25.1


### Packages

In [62]:
import torch
from transformers import BertModel, BertTokenizer
from transformers import logging
logging.set_verbosity_error()
import torch.nn as nn
from tqdm.notebook import tqdm
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, RandomSampler

import numpy as np
import pandas as pd
import json
import copy
import time
import gc
from sklearn.model_selection import train_test_split

## Data Preprocessing

In [63]:
SEQ_LENGTH = 128
BATCH_SIZE = 8
LABEL_DICT = {'fear':0, 'neutral':1, 'sad':2, 'surprise':3, 'angry':4, 'happy':5} # Mapping label code and meaning
TOKENIZER = BertTokenizer.from_pretrained("chinese_wwm_ext_pytorch") # Hugging face BertTokenizer to load pretrain model

#tokenizer = BertTokenizer.from_pretrained("hfl/chinese-bert-wwm-ext")
#model = BertModel.from_pretrained("hfl/chinese-bert-wwm-ext")


DEVELOPMENT_SET_PATH = 'data/usual_train.txt'
TEST_SET_PATH = 'data/usual_test_labeled.txt'

def convert_text_to_token(tokenizer, sentence, seq_length):
    """Tokenize sentence

    Args:
        tokenizer (PreTrainedTokenizer): a pretrained tokenizer with special token set to 
            {'unk_token': '[UNK]', 'sep_token': '[SEP]', 
             'pad_token': '[PAD]', 'cls_token': '[CLS]', 
             'mask_token': '[MASK]'}
        sentence (str): 
        seq_length (int): length of maximum input sentence accepted
    
    Returns: tuple(word_ids, segments, attention_masks)
        word_ids (list): tokenized sentence
        segments (list): label segmentation of original sentence and padding
        attention_masks (list): label whether the word is masked
    """ 
    tokens = tokenizer.tokenize(sentence) # Tokenize the sentence
    tokens = ["[CLS]"] + tokens + ["[SEP]"] # Add [CLS] before token and [SEP] after token
    word_ids = tokenizer.convert_tokens_to_ids(tokens) # Generate list of word id
    segments = [0] * len(word_ids) # Label whether it is segmented
    attention_masks = [1] * len(word_ids) # Label whether the word is masked
    # Chop or pad the sentence into a single length - seq_length
    if len(word_ids) < seq_length: # Padding
        length_to_pad = seq_length - len(word_ids)
        word_ids += [0] * length_to_pad # [0] is the index of word "PAD" in the vocabulary table
        segments += [1] * length_to_pad # [1] denotes that this part of words are PAD
        attention_masks += [0] * length_to_pad # Change attention mask of PAD part as [0]
    else: # Chopping
        word_ids = word_ids[:seq_length]
        segments = segments[:seq_length]
        attention_masks = attention_masks[:seq_length]
    assert len(word_ids) == len(segments) == len(attention_masks)
    return word_ids, segments, attention_masks

In [65]:
def genDataLoader(data_type):
    '''Construct dataset loader

    Args:
        data_type (str): 'train' in training, 'val' in validating, 'test' in testing
    '''
    if data_type == 'test':
        with open(TEST_SET_PATH, encoding='utf8') as file:
            data = json.load(file)
    else:
        with open(DEVELOPMENT_SET_PATH, encoding='utf8') as file:
            data = json.load(file)
            # TESTING_STAGE
            if TESTING:
                dev_set, _ = train_test_split(data, train_size=320, random_state=4995)
                train_set, val_set = train_test_split(dev_set, test_size=0.2, random_state=4995)
            else:
                train_set, val_set = train_test_split(data, test_size=0.2, random_state=4995)
            data = train_set if data_type == 'train' else val_set
    ids_pool = []
    segments_pool = []
    masks_pool = []
    target_pool = []
    count = 0
    # Process all the sentences
    for each in data:
        cur_ids, cur_type, cur_mask = convert_text_to_token(TOKENIZER, each['content'], seq_length = SEQ_LENGTH)
        ids_pool.append(cur_ids)
        segments_pool.append(cur_type)
        masks_pool.append(cur_mask)
        cur_target = LABEL_DICT[each['label']]
        target_pool.append([cur_target])
        count += 1
        if count % 2000 == 0:
            print(f'Processed {count} sentences for {data_type}')
    # Construct Data Generater
    data_gen = TensorDataset(torch.LongTensor(np.array(ids_pool)),
                             torch.LongTensor(np.array(segments_pool)),
                             torch.LongTensor(np.array(masks_pool)),
                             torch.LongTensor(np.array(target_pool)))
    sampler = RandomSampler(data_gen)
    loader = DataLoader(data_gen, sampler=sampler, batch_size=BATCH_SIZE)
    return loader

In [66]:
TESTING = True
train_datagen = genDataLoader('train')
val_datagen = genDataLoader('val')
test_datagen = genDataLoader('test')

Processed 2000 sentences for test
Processed 4000 sentences for test


## Modeling

In [67]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE

device(type='cuda')


### Load Pretrain Model

- `wwm` means whole word masking pretrained upon EXT dataset

In [68]:
# All pretrain models in chinese
MODELS_PATHS_UNITS = {
    'BERT': ('bert-base-chinese', 768),
    'BERT-wwm': ('hfl/chinese-bert-wwm-ext', 768),
    'RoBERTa': ('uer/chinese_roberta_L-12_H-768', 768),
    'RoBERTa-wwm': ('hfl/chinese-roberta-wwm-ext', 768),
    'RoBERTa-wwm-large': ('hfl/chinese-roberta-wwm-ext-large', 1024),
    'Re-trained RoBERTa-wwm': ('hfl/rbt3', 768),
    'Re-trained RoBERTa-wwm-large': ('hfl/rbtl3', 1024),
}

In [69]:
class Model(nn.Module):
    def __init__(self, num_classes, model_name):
        super(Model, self).__init__()
        self.bert = BertModel.from_pretrained(MODELS_PATHS_UNITS[model_name][0], return_dict=False)  # /roberta-wwm-ext pretrain/
        for param in self.bert.parameters():
            param.requires_grad = True  # Allow all parameters to be updated
            
        self.fc = nn.Linear(MODELS_PATHS_UNITS[model_name][1], num_classes)   # A layer to calculate logits of 6 ouput classes from 768 (hidden size of BERT)
            # Note: We are going to use Cross-EntropyLoss with a softmax “embedded”.
    def forward(self, x, token_type_ids, attention_mask):
        context = x  # Input sentence
        segments = token_type_ids
        mask = attention_mask  # Only mask the padding part
        _, pooled = self.bert(context, token_type_ids=segments, attention_mask=mask)
        logits = self.fc(pooled) # probability of 6 classes
        return logits

### Fine-tuning

In [70]:
def train(model, model_name, train_loader, test_loader, optimizer, device=DEVICE):
    '''Train the model
    '''

    model.train()
    best_acc = 0.0
    training_loss = []
    training_acc = []
    validation_loss = []
    validation_acc = []
    time_usage = []
    epochs = list(range(1, NUM_EPOCHS + 1))
    for epoch in tqdm(epochs):
        batch_idx = 0
        running_loss = 0
        running_correct = 0
        training_start_time = time.time()
        for (word_ids, token_types, attention_masks, y) in tqdm(train_loader):
            word_ids, token_types, attention_masks, y = word_ids.to(device), token_types.to(device), attention_masks.to(device), y.to(device)
            y_pred = model(word_ids, token_type_ids=token_types, attention_mask=attention_masks)
            optimizer.zero_grad()
            loss = F.cross_entropy(y_pred, y.squeeze()) # Calculate Loss
            loss.backward()
            optimizer.step()
            # Logging the loss and accuracy
            running_loss += loss.item()
            pred = y_pred.argmax(dim=1) # Get the maximum probability
            running_correct += (pred == y.view_as(pred)).sum().item()
            batch_idx += 1
            # Print Every 500 batch
            if(batch_idx + 1) % 500 == 0:
                print('Epoch: {} [{}/{} ({:.2f}%)]\tBatch Loss: {:.6f}\tAvg Loss: {:.6f}\t'.format(
                    epoch, 
                    (batch_idx+1) * len(word_ids),
                    len(train_loader.dataset),
                    100. * batch_idx / len(train_loader),
                    loss.item(),
                    running_loss / batch_idx)
                    )
        # Compute time cost
        time_cost = time.time() - training_start_time
        time_usage.append(time_cost)
        print(f'Epoch {epoch} finished, took {time_cost:.1f}s')

        # Logging loss and accuracy, average on every updates(batches) in the training stage
        training_loss.append(running_loss / len(train_loader))
        training_acc.append(running_correct / len(train_loader.dataset))
        
        # Evaluate performance on testset
        val_loss, val_acc = test(model, test_loader) 
        validation_loss.append(val_loss)
        validation_acc.append(val_acc)

        # Keep Best model
        if best_acc < val_acc:
            model_path = f'{BEST_MODEL_FOLDER}best_{model_name}.pth' if not TESTING else f'{BEST_MODEL_FOLDER}best_testing_{model_name}.pth'
            torch.save(model.state_dict(), model_path)
            best_acc = val_acc
    # Output logs after all epoches
    progress_log = pd.DataFrame({'Model': model_name,
                                 'Epoch': epochs,
                                 'training_loss': training_loss,
                                 'training_acc': training_acc,
                                 'validation_loss': validation_loss,
                                 'validation_acc': validation_acc,
                                 'time_usage': time_usage
                                 })
    return progress_log

def test(model, test_loader, device=DEVICE):
    '''Evaluate the model
    '''
    model.eval()
    test_loss = 0.0
    correct = 0
    for (word_ids, token_types, attention_masks, y) in test_loader:
        word_ids, token_types, attention_masks, y = word_ids.to(device), token_types.to(device), attention_masks.to(device), y.to(device)
        with torch.no_grad():
            y_ = model(word_ids, token_type_ids=token_types, attention_mask=attention_masks)
        test_loss += F.cross_entropy(y_, y.squeeze()).item()
        pred = y_.max(-1, keepdim=True)[1] # Obtain the maximum probability
        correct += pred.eq(y.view_as(pred)).sum().item()
    test_loss /= len(test_loader)
    test_acc = correct / len(test_loader.dataset)
    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format(
          test_loss, correct, len(test_loader.dataset),
          100. * test_acc))
    return test_loss, test_acc

In [72]:
# Setting up
REPLACE_EXIST = True
NUM_EPOCHS = 2
BEST_MODEL_FOLDER = 'result/model/'  # Path to save best model
TRAINING_LOGS_FOLDER = 'result/training/'  # Path to save training logs

# Fine-tune each pretrain model
for model_name in tqdm(MODELS_PATHS_UNITS.keys()):
    print('-'*10, model_name, '-'*10)
    model_path = f'{BEST_MODEL_FOLDER}best_{model_name}.pth' if not TESTING else f'{BEST_MODEL_FOLDER}best_testing_{model_name}.pth'
    log_path = f'{TRAINING_LOGS_FOLDER}{model_name}.pickle'
    if not os.path.exists(model_path) or not os.path.exists(log_path) or REPLACE_EXIST:
        pretrained_model_ = Model(num_classes=6, model_name=model_name)
        sentiment_classifier = pretrained_model_.to(DEVICE)
        gc.collect()
        optimizer = torch.optim.Adam(sentiment_classifier.parameters(), lr=2e-5)
        training_log = train(sentiment_classifier, model_name, train_datagen, val_datagen, optimizer)
        training_log.to_pickle(log_path)

  0%|          | 0/7 [00:00<?, ?it/s]

---------- BERT ----------


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 1 finished, took 7.0s
Test set: Average loss: 1.3235, Accuracy: 34/64 (53.12%)


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 2 finished, took 6.6s
Test set: Average loss: 0.9741, Accuracy: 43/64 (67.19%)
---------- BERT-wwm ----------


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 1 finished, took 6.6s
Test set: Average loss: 1.5525, Accuracy: 30/64 (46.88%)


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 2 finished, took 6.3s
Test set: Average loss: 1.0749, Accuracy: 43/64 (67.19%)
---------- RoBERTa ----------


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 1 finished, took 6.4s
Test set: Average loss: 1.2852, Accuracy: 37/64 (57.81%)


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 2 finished, took 6.4s
Test set: Average loss: 1.0342, Accuracy: 44/64 (68.75%)
---------- RoBERTa-wwm ----------


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 1 finished, took 6.5s
Test set: Average loss: 1.3326, Accuracy: 30/64 (46.88%)


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 2 finished, took 6.5s
Test set: Average loss: 0.9722, Accuracy: 40/64 (62.50%)
---------- RoBERTa-wwm-large ----------


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 1 finished, took 22.3s
Test set: Average loss: 1.0631, Accuracy: 40/64 (62.50%)


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 2 finished, took 21.8s
Test set: Average loss: 0.8701, Accuracy: 44/64 (68.75%)
---------- Re-trained RoBERTa-wwm ----------


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 1 finished, took 1.8s
Test set: Average loss: 1.4306, Accuracy: 30/64 (46.88%)


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 2 finished, took 1.7s
Test set: Average loss: 1.2841, Accuracy: 32/64 (50.00%)
---------- Re-trained RoBERTa-wwm-large ----------


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 1 finished, took 3.0s
Test set: Average loss: 1.3024, Accuracy: 38/64 (59.38%)


  0%|          | 0/32 [00:00<?, ?it/s]

Epoch 2 finished, took 3.2s
Test set: Average loss: 1.0039, Accuracy: 42/64 (65.62%)


In [73]:
results = []
for model_name in tqdm(MODELS_PATHS_UNITS.keys()):
    log_path = f'{TRAINING_LOGS_FOLDER}{model_name}.pickle'
    if os.path.exists(log_path):
        training_log = pd.read_pickle(log_path)
        results.append(training_log)
pd.concat(results, axis=0)  

  0%|          | 0/7 [00:00<?, ?it/s]

Unnamed: 0,Model,Epoch,training_loss,training_acc,validation_loss,validation_acc,time_usage
0,BERT,1,1.557147,0.457031,1.323485,0.53125,6.954685
1,BERT,2,1.036218,0.722656,0.974068,0.671875,6.567722
0,BERT-wwm,1,1.703167,0.308594,1.552531,0.46875,6.597634
1,BERT-wwm,2,1.260415,0.621094,1.074913,0.671875,6.328618
0,RoBERTa,1,1.583706,0.40625,1.285154,0.578125,6.449116
1,RoBERTa,2,1.044225,0.648438,1.034226,0.6875,6.383154
0,RoBERTa-wwm,1,1.587301,0.371094,1.332632,0.46875,6.503706
1,RoBERTa-wwm,2,1.014825,0.683594,0.97223,0.625,6.464192
0,RoBERTa-wwm-large,1,1.511524,0.375,1.063066,0.625,22.287153
1,RoBERTa-wwm-large,2,0.701687,0.78125,0.870093,0.6875,21.830596


## Evaluating

In [None]:
model_names = []
losses = []
accuracys = []
# Fine-tune each pretrain model
for model_name in tqdm(MODELS_PATHS_UNITS.keys()):
    # Initialize a model
    sentiment_classifier = Model(num_classes=6, model_name=model_name).to(DEVICE)

    # Load model parameters
    best_model_path = f'{BEST_MODEL_FOLDER}best_{model_name}.pth'
    sentiment_classifier.load_state_dict(torch.load(best_model_path))

    # Evaluate on testset
    loss, accuracy = test(sentiment_classifier, test_datagen)

    # Logging
    model_names.append(model_name)
    losses.append(loss)
    accuracys = accuracys.append(accuracy)

test_result = pd.DataFrame({
    'model_names': model_names,
    'loss': losses,
    'accuracy': accuracys,
    }
)
test_result

  0%|          | 0/7 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

In [None]:
from build_data import genDataLoader, convert_text_to_token
import torch
from transformers import BertTokenizer, BertModel
import torch.nn as nn
import json
import numpy as np

# 复用模型结构
class Model(nn.Module):
    def __init__(self, num_classes):
        super(Model, self).__init__()
        self.bert = BertModel.from_pretrained('chinese_wwm_ext_pytorch', return_dict=False)  # /roberta-wwm-ext pretrain/
        for param in self.bert.parameters():
            param.requires_grad = True  # 所有参数求梯度
        self.fc = nn.Linear(768, num_classes)   # 768 -> 6
    def forward(self, x, token_type_ids, attention_mask):
        context = x  # 输入的句子
        segments = token_type_ids
        mask = attention_mask  # 对padding部分进行mask，和句子相同size，padding部分用0表示，如：[1, 1, 1, 1, 0, 0]
        _, pooled = self.bert(context, token_type_ids=segments, attention_mask=mask)
        out = self.fc(pooled)   # 得到6分类概率
        return out


LABEL_DICT = {0:'fear', 1:'neutral', 2:'sad', 3:'surprise', 4:'angry', 5:'happy'} # 标签映射表
SEQ_LENGTH = 128
TOKENIZER = BertTokenizer.from_pretrained("chinese_wwm_ext_pytorch") #模型[roberta-wwm-ext]所在的目录名称
# 加载模型
MODEL = Model(num_classes=6)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL = MODEL.to(DEVICE)

Some weights of the model checkpoint at chinese_wwm_ext_pytorch were not used when initializing BertModel: ['cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


模型加载完毕


In [None]:
sentiment_classifier = copy.deepcopy(pretrained_model)
sentiment_classifier.load_state_dict(torch.load(BEST_MODEL_PATH))
print('模型加载完毕')

def pred(word, model):
    cur_ids, cur_type, cur_mask = convert_text_to_token(TOKENIZER, word, seq_length=SEQ_LENGTH)
    cur_ids, cur_type, cur_mask = torch.LongTensor(np.array([cur_ids])).to(DEVICE), torch.LongTensor(np.array([cur_type])).to(DEVICE), torch.LongTensor(np.array([cur_mask])).to(DEVICE) # 数据构造成tensor形式
    with torch.no_grad():
        y_ = model(cur_ids, token_type_ids=cur_type, attention_mask=cur_mask)
        pred = y_.max(-1, keepdim=True)[1]  # 取最大值
        # cur_pre = LABEL_DICT[int(pred[0][0].cuda().data.cpu().numpy())] # 预测的情绪
        cur_pre = LABEL_DICT[int(pred[0][0].data.cpu().numpy())] # 预测的情绪
        print(cur_pre)


In [None]:
pred('草泥马好可爱', MODEL)

angry


In [None]:
pred('草泥马是什么神仙物种', MODEL)

angry


In [None]:
pred('草！我爱死你了！！！！！！！！！！！', MODEL)

angry


In [None]:
pred('世界上五大最可爱动物:草泥马第二', MODEL)

happy


In [None]:




if __name__ == '__main__':
    test(MODEL)

[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [None]:
### Weighted Stacking