__final

In [None]:
import numpy as np
import pandas as pd
import json
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from transformers import BertTokenizer, BertModel

tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')

TRAIN_DATA_PATH = '/kaggle/input/nuaddataset/NQuAD_train.json'
TEST_DATA_PATH = '/kaggle/input/nuaddataset/NQuAD_test.json'
VAL_DATA_PATH = '/kaggle/input/validation-set/validation_split.json'


def read_json(file):
    with open(file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    return data


TRAIN_DATA = read_json(TRAIN_DATA_PATH)
VALIDATION_DATA = read_json(VAL_DATA_PATH)
TEST_DATA = read_json(TEST_DATA_PATH)

device = "cuda" if torch.cuda.is_available() else "cpu"

LEARNING_RATE = 0.05
MAX_LEN = 32
BATCH_SIZE = 20
SEQ_LEN = 20


def cm_encoding(answer_options):
    encoding = np.zeros((4, 10, 12))  # Shape: (num_options, max_num_length, encoding_dim)
#     print(answer_options)
    for i, option in enumerate(answer_options):
#         print(i)
#         print(option)
        numeral = str(option)
#         Convert each digit to one-hot encoding
        for j, digit in enumerate(numeral):
            if digit=='-':
                encoding[i,j,11]=1
#                 continue
            elif digit=='.':
                encoding[i,j,10]=1
            else:
                encoding[i, j, int(digit)] = 1
            
        # Add magnitude information
        magnitude = len(numeral) - 1
        encoding[i, -1, magnitude] = 1  # Using the last row for magnitud
#     print(encoding)
    return torch.tensor(encoding, dtype=torch.float32)


class Chinese_Article_Data(Dataset):
    def __init__(self, file, tokenizer, max_len, seq_len):
        self.data = file
        self.tokenizer = tokenizer
        self.max_len   = max_len
        self.seq_len   = seq_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        news_article_input_ids   = torch.empty((self.seq_len,self.max_len))
        news_article_attention_list = torch.empty((self.seq_len,self.max_len))
        itr = 0
        temp_arr=[]
#         sentences_containing_the_numeral_in_answer_options
        for i in range(4):
            for ele in self.data[idx]['sentences_containing_the_numeral_in_answer_options'][i]:
                temp_arr.append(ele)
        for sentence_idx in range(len(temp_arr)):
            news_article_instance = temp_arr[itr]
            encoding = self.tokenizer.encode_plus (
                news_article_instance, 
                max_length = self.max_len,
                pad_to_max_length = True,
                truncation = True,
                padding = 'max_length',
                return_attention_mask= True,
                return_tensors = 'pt',
            )
            news_article_input_ids[itr] = encoding['input_ids']
            news_article_attention_list[itr]  = encoding['attention_mask']
            itr += 1
            if (itr == self.seq_len):
                break
            
        while(itr < self.seq_len):
            encoding = self.tokenizer.encode_plus (
                'NULL', 
                max_length = self.max_len,
                pad_to_max_length = True,
                truncation = True,
                padding = 'max_length',
                return_attention_mask= True,
                return_tensors = 'pt',
            )
            news_article_input_ids[itr] = encoding['input_ids']
            news_article_attention_list[itr]  = encoding['attention_mask']
            itr += 1
        
        question = self.data[idx]['question_stem']
        encoding = self.tokenizer.encode_plus (
                question, 
                max_length = self.max_len,
                pad_to_max_length = True,
                truncation = True,
                padding = 'max_length',
                return_attention_mask= True,
                return_tensors = 'pt',
            )
        question_input_ids = encoding['input_ids']
        question_attention_mask = encoding['attention_mask']
#         answer_option = torch.tensor([float(x.replace(',', '')) for x in self.data[idx]['answer_options']])
        answer_option = cm_encoding([float(x.replace(',', '')) for x in self.data[idx]['answer_options']])
        answer   = torch.tensor([1.0 if (x==self.data[idx]['ans']) else 0.0 for x in range(4)])
#         print(itr)
        return news_article_input_ids, news_article_attention_list, question_input_ids, question_attention_mask, answer, answer_option


def getloader(file=None, tokenizer=tokenizer, max_len=MAX_LEN, batch_size=BATCH_SIZE, seq_len=SEQ_LEN):
    dataset = Chinese_Article_Data(file, tokenizer, max_len, seq_len)
    loader = DataLoader(dataset=dataset, batch_size=batch_size, num_workers=4, shuffle=False, pin_memory=True)
    return loader






class Chinese_answer_Model(nn.Module):
    def __init__(self):
        super(Chinese_answer_Model, self).__init__()
        self.bert = BertModel.from_pretrained("bert-base-chinese", return_dict=True)
        self.fc1 = nn.Linear(768*(SEQ_LEN+1), 4)
        self.out = nn.Linear(8, 4)
        self.linear_transform = nn.Linear(10*12, 1)  # Change output size to 1
        
    def forward(self, news_article_input_ids, news_article_attention_mask, question_input_ids, question_attention_mask, answer_option):
        output = torch.empty((news_article_input_ids.shape[0], 4)).to(device)
        
        for i in range(news_article_input_ids.shape[0]):
            text_out = self.bert(input_ids=news_article_input_ids[i].long(), attention_mask=news_article_attention_mask[i])
            question_out = self.bert(input_ids=question_input_ids[i].long(), attention_mask=question_attention_mask[i])
            out = torch.cat((text_out.pooler_output, question_out.pooler_output), dim=0)
            out = out.view(-1)
            out = self.fc1(out)
            out = torch.relu(out)
            
            # Flatten answer_option[i] and apply linear transformation
            transformed_answer = self.linear_transform(answer_option[i].view(-1, 10*12))
            transformed_answer = transformed_answer.squeeze()  # Remove extra dimension
#             print(transformed_answer.shape)
#             print(transformed_answer)
            # Concatenate transformed answer with other features
            out = torch.cat((out, transformed_answer), dim=-1)
            
            # Pass through the output layer and apply softmax
            out = self.out(out)
            out = torch.softmax(out, dim=0)
            
            output[i] = out
        
        return output





# answer_option[i].view(-1)

train_data_loader = getloader(TRAIN_DATA)
validation_data_loader = getloader(VALIDATION_DATA)
test_data_loader = getloader(TEST_DATA)

# model = Chinese_answer_Model()
# model.to(device)
# model = nn.DataParallel(model)
model = Chinese_answer_Model()
model.load_state_dict(torch.load('/kaggle/input/model1/Ayush_NLP_R2_final.pth'))
model.to(device)
model = nn.DataParallel(model)

def loss_fn(outputs, targets):
    return nn.CrossEntropyLoss()(outputs, targets)


optimizer = torch.optim.SGD(params=model.parameters(), lr=LEARNING_RATE)

EPOCH = 5


def train(data_loader, val_loader):
    train_loss_list = []
    val_loss_list = []
    for epochs in range(EPOCH):
        train_loss = 0
        val_loss = 0
        model.train()
        for batch, data in tqdm(enumerate(data_loader, 0), unit="batch", total=len(data_loader)):
            news_article_input_ids, news_article_attention_mask, question_input_ids, question_attention_mask, answer, answer_option = data
            news_article_input_ids = news_article_input_ids.to(device)
            news_article_attention_mask = news_article_attention_mask.to(device)
            question_input_ids = question_input_ids.to(device)
            question_attention_mask = question_attention_mask.to(device)
            answer = answer.to(device)
            optimizer.zero_grad()
            outputs = model(news_article_input_ids, news_article_attention_mask, question_input_ids, question_attention_mask, answer_option)
            loss = loss_fn(outputs, answer)

            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        model.eval()
        with torch.no_grad():
            for batch, data in tqdm(enumerate(val_loader, 0), unit="batch", total=len(val_loader)):
                news_article_input_ids, news_article_attention_mask, question_input_ids, question_attention_mask, answer, answer_option = data
                news_article_input_ids = news_article_input_ids.to(device)
                news_article_attention_mask = news_article_attention_mask.to(device)
                question_input_ids = question_input_ids.to(device)
                question_attention_mask = question_attention_mask.to(device)
                answer = answer.to(device)
                outputs = model(news_article_input_ids, news_article_attention_mask, question_input_ids, question_attention_mask, answer_option)
                loss = loss_fn(outputs, answer)

                val_loss += loss.item()

        print(f"[{epochs + 1}/{EPOCH}], Training Loss: {train_loss / len(data_loader)} Validation_loss: {val_loss / len(val_loader)}")
        train_loss_list.append(train_loss / len(data_loader))
        val_loss_list.append(val_loss / len(val_loader))
    return train_loss_list, val_loss_list


trainLoss, valLoss = train(train_data_loader, validation_data_loader)


In [None]:
torch.save(model.module.state_dict(), 'Ayush_NLP_R3_final.pth')

To Load and Test

In [27]:
import numpy as np
import pandas as pd
import json
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from transformers import BertTokenizer, BertModel
def read_json(file):
    with open(file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    return data

device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
# LEARNING_RATE = 0.5
MAX_LEN = 32
BATCH_SIZE = 1
SEQ_LEN = 20
def cm_encoding(answer_options):
    encoding = np.zeros((4, 10, 12))  # Shape: (num_options, max_num_length, encoding_dim)
#     print(answer_options)
    for i, option in enumerate(answer_options):
#         print(i)
#         print(option)
        numeral = str(option)
#         Convert each digit to one-hot encoding
        for j, digit in enumerate(numeral):
            if digit=='-':
                encoding[i,j,11]=1
#                 continue
            elif digit=='.':
                encoding[i,j,10]=1
            else:
                encoding[i, j, int(digit)] = 1
            
        # Add magnitude information
        magnitude = len(numeral) - 1
        encoding[i, -1, magnitude] = 1  # Using the last row for magnitud
#     print(encoding)
    return torch.tensor(encoding, dtype=torch.float32)


class Chinese_Article_Data(Dataset):
    def __init__(self, file, tokenizer, max_len, seq_len):
        self.data = file
        self.tokenizer = tokenizer
        self.max_len   = max_len
        self.seq_len   = seq_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        news_article_input_ids   = torch.empty((self.seq_len,self.max_len))
        news_article_attention_list = torch.empty((self.seq_len,self.max_len))
        itr = 0
        temp_arr=[]
#         sentences_containing_the_numeral_in_answer_options
        for i in range(4):
            for ele in self.data[idx]['sentences_containing_the_numeral_in_answer_options'][i]:
                temp_arr.append(ele)
        for sentence_idx in range(len(temp_arr)):
            news_article_instance = temp_arr[itr]
            encoding = self.tokenizer.encode_plus (
                news_article_instance, 
                max_length = self.max_len,
                pad_to_max_length = True,
                truncation = True,
                padding = 'max_length',
                return_attention_mask= True,
                return_tensors = 'pt',
            )
            news_article_input_ids[itr] = encoding['input_ids']
            news_article_attention_list[itr]  = encoding['attention_mask']
            itr += 1
            if (itr == self.seq_len):
                break
            
        while(itr < self.seq_len):
            encoding = self.tokenizer.encode_plus (
                'NULL', 
                max_length = self.max_len,
                pad_to_max_length = True,
                truncation = True,
                padding = 'max_length',
                return_attention_mask= True,
                return_tensors = 'pt',
            )
            news_article_input_ids[itr] = encoding['input_ids']
            news_article_attention_list[itr]  = encoding['attention_mask']
            itr += 1
        
        question = self.data[idx]['question_stem']
        encoding = self.tokenizer.encode_plus (
                question, 
                max_length = self.max_len,
                pad_to_max_length = True,
                truncation = True,
                padding = 'max_length',
                return_attention_mask= True,
                return_tensors = 'pt',
            )
        question_input_ids = encoding['input_ids']
        question_attention_mask = encoding['attention_mask']
#         answer_option = torch.tensor([float(x.replace(',', '')) for x in self.data[idx]['answer_options']])
        answer_option = cm_encoding([float(x.replace(',', '')) for x in self.data[idx]['answer_options']])
        answer   = torch.tensor([1.0 if (x==self.data[idx]['ans']) else 0.0 for x in range(4)])
#         print(itr)
        return news_article_input_ids, news_article_attention_list, question_input_ids, question_attention_mask, answer, answer_option


def getloader(file=None, tokenizer=tokenizer, max_len=MAX_LEN, batch_size=BATCH_SIZE, seq_len=SEQ_LEN):
    dataset = Chinese_Article_Data(file, tokenizer, max_len, seq_len)
    loader = DataLoader(dataset=dataset, batch_size=batch_size, num_workers=4, shuffle=False, pin_memory=True)
    return loader
class Chinese_answer_Model(nn.Module):
    def __init__(self):
        super(Chinese_answer_Model, self).__init__()
        self.bert = BertModel.from_pretrained("bert-base-chinese", return_dict=True)
        self.fc1 = nn.Linear(768*(SEQ_LEN+1), 4)
        self.out = nn.Linear(8, 4)
        self.linear_transform = nn.Linear(10*12, 1)  # Change output size to 1
        
    def forward(self, news_article_input_ids, news_article_attention_mask, question_input_ids, question_attention_mask, answer_option):
        output = torch.empty((news_article_input_ids.shape[0], 4)).to(device)
        
        for i in range(news_article_input_ids.shape[0]):
            text_out = self.bert(input_ids=news_article_input_ids[i].long(), attention_mask=news_article_attention_mask[i])
            question_out = self.bert(input_ids=question_input_ids[i].long(), attention_mask=question_attention_mask[i])
            out = torch.cat((text_out.pooler_output, question_out.pooler_output), dim=0)
            out = out.view(-1)
            out = self.fc1(out)
            out = torch.relu(out)
            
            # Flatten answer_option[i] and apply linear transformation
            transformed_answer = self.linear_transform(answer_option[i].view(-1, 10*12))
            transformed_answer = transformed_answer.squeeze()  # Remove extra dimension
#             print(transformed_answer.shape)
#             print(transformed_answer)
            # Concatenate transformed answer with other features
            out = torch.cat((out, transformed_answer), dim=-1)
            
            # Pass through the output layer and apply softmax
            out = self.out(out)
            out = torch.softmax(out, dim=0)
            
            output[i] = out
        
        return output
def loss_fn(outputs, targets):
    return nn.CrossEntropyLoss()(outputs, targets)
from sklearn.metrics import f1_score, accuracy_score,  classification_report
def evaluate(file_path,model):
    # give the loaded file
    with open(file_path, 'r') as f:
        file = json.load(f)
    
    data = getloader(file)
    
    num_correct = 0
    num_wrong= 0
    c=0
    for batch, d in enumerate(data):
        news_article_input_ids, news_article_attention_mask,question_input_ids, question_attention_mask, answer, answer_option = d
        news_article_input_ids = news_article_input_ids.to(device)
        news_article_attention_mask = news_article_attention_mask.to(device)
        question_input_ids = question_input_ids.to(device)
        question_attention_mask = question_attention_mask.to(device)
        answer = answer.to(device)
        outputs = model(news_article_input_ids, news_article_attention_mask,question_input_ids, question_attention_mask, answer_option)
#         print(outputs)
      
        
        target_indices = torch.argmax(answer, dim=1)
        prediction_indices = torch.argmax(outputs, dim=1)
        # Compare the indices to find correct and wrong predictions
        num_correct += torch.sum(target_indices == prediction_indices).item()
        num_wrong += torch.sum(target_indices != prediction_indices).item()
        
    
    print (f"Accuracy : {num_correct/(num_correct+num_wrong)}")

load_model = Chinese_answer_Model()
load_model.load_state_dict(torch.load('/kaggle/input/model2/Ayush_NLP_R3_final.pth'))
load_model.to(device)
load_model = nn.DataParallel(load_model)
file_path = "/kaggle/working/test_file.json"
evaluate(file_path, load_model)

Accuracy : 0.621


Demo code

In [None]:
import numpy as np
import pandas as pd
import json
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from transformers import BertTokenizer, BertModel

def menu():
    print("____________________________________________________________________________________________________________________________________________________")
    print()
def read_json(file):
    with open(file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    return data

device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
# LEARNING_RATE = 0.5
MAX_LEN = 32
BATCH_SIZE = 1
SEQ_LEN = 20
def cm_encoding(answer_options):
    encoding = np.zeros((4, 10, 12))  # Shape: (num_options, max_num_length, encoding_dim)
#     print(answer_options)
    for i, option in enumerate(answer_options):
#         print(i)
#         print(option)
        numeral = str(option)
#         Convert each digit to one-hot encoding
        for j, digit in enumerate(numeral):
            if digit=='-':
                encoding[i,j,11]=1
#                 continue
            elif digit=='.':
                encoding[i,j,10]=1
            else:
                encoding[i, j, int(digit)] = 1
            
        # Add magnitude information
        magnitude = len(numeral) - 1
        encoding[i, -1, magnitude] = 1  # Using the last row for magnitud
#     print(encoding)
    return torch.tensor(encoding, dtype=torch.float32)


class Chinese_Article_Data(Dataset):
    def __init__(self, file, tokenizer, max_len, seq_len):
        self.data = file
        self.tokenizer = tokenizer
        self.max_len   = max_len
        self.seq_len   = seq_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        news_article_input_ids   = torch.empty((self.seq_len,self.max_len))
        news_article_attention_list = torch.empty((self.seq_len,self.max_len))
        itr = 0
        temp_arr=[]
#         sentences_containing_the_numeral_in_answer_options
        for i in range(4):
            for ele in self.data[idx]['sentences_containing_the_numeral_in_answer_options'][i]:
                temp_arr.append(ele)
        for sentence_idx in range(len(temp_arr)):
            news_article_instance = temp_arr[itr]
            encoding = self.tokenizer.encode_plus (
                news_article_instance, 
                max_length = self.max_len,
                pad_to_max_length = True,
                truncation = True,
                padding = 'max_length',
                return_attention_mask= True,
                return_tensors = 'pt',
            )
            news_article_input_ids[itr] = encoding['input_ids']
            news_article_attention_list[itr]  = encoding['attention_mask']
            itr += 1
            if (itr == self.seq_len):
                break
            
        while(itr < self.seq_len):
            encoding = self.tokenizer.encode_plus (
                'NULL', 
                max_length = self.max_len,
                pad_to_max_length = True,
                truncation = True,
                padding = 'max_length',
                return_attention_mask= True,
                return_tensors = 'pt',
            )
            news_article_input_ids[itr] = encoding['input_ids']
            news_article_attention_list[itr]  = encoding['attention_mask']
            itr += 1
        
        question = self.data[idx]['question_stem']
        encoding = self.tokenizer.encode_plus (
                question, 
                max_length = self.max_len,
                pad_to_max_length = True,
                truncation = True,
                padding = 'max_length',
                return_attention_mask= True,
                return_tensors = 'pt',
            )
        question_input_ids = encoding['input_ids']
        question_attention_mask = encoding['attention_mask']
#         answer_option = torch.tensor([float(x.replace(',', '')) for x in self.data[idx]['answer_options']])
        answer_option = cm_encoding([float(x.replace(',', '')) for x in self.data[idx]['answer_options']])
        answer   = torch.tensor([1.0 if (x==self.data[idx]['ans']) else 0.0 for x in range(4)])
#         print(itr)
        return news_article_input_ids, news_article_attention_list, question_input_ids, question_attention_mask, answer, answer_option


def getloader(file=None, tokenizer=tokenizer, max_len=MAX_LEN, batch_size=BATCH_SIZE, seq_len=SEQ_LEN):
    dataset = Chinese_Article_Data(file, tokenizer, max_len, seq_len)
    loader = DataLoader(dataset=dataset, batch_size=batch_size, num_workers=4, shuffle=False, pin_memory=True)
    return loader
class Chinese_answer_Model(nn.Module):
    def __init__(self):
        super(Chinese_answer_Model, self).__init__()
        self.bert = BertModel.from_pretrained("bert-base-chinese", return_dict=True)
        self.fc1 = nn.Linear(768*(SEQ_LEN+1), 4)
        self.out = nn.Linear(8, 4)
        self.linear_transform = nn.Linear(10*12, 1)  # Change output size to 1
        
    def forward(self, news_article_input_ids, news_article_attention_mask, question_input_ids, question_attention_mask, answer_option):
        output = torch.empty((news_article_input_ids.shape[0], 4)).to(device)
        
        for i in range(news_article_input_ids.shape[0]):
            text_out = self.bert(input_ids=news_article_input_ids[i].long(), attention_mask=news_article_attention_mask[i])
            question_out = self.bert(input_ids=question_input_ids[i].long(), attention_mask=question_attention_mask[i])
            out = torch.cat((text_out.pooler_output, question_out.pooler_output), dim=0)
            out = out.view(-1)
            out = self.fc1(out)
            out = torch.relu(out)
            
            # Flatten answer_option[i] and apply linear transformation
            transformed_answer = self.linear_transform(answer_option[i].view(-1, 10*12))
            transformed_answer = transformed_answer.squeeze()  # Remove extra dimension
#             print(transformed_answer.shape)
#             print(transformed_answer)
            # Concatenate transformed answer with other features
            out = torch.cat((out, transformed_answer), dim=-1)
            
            # Pass through the output layer and apply softmax
            out = self.out(out)
            out = torch.softmax(out, dim=0)
            
            output[i] = out
        
        return output
def loss_fn(outputs, targets):
    return nn.CrossEntropyLoss()(outputs, targets)
from sklearn.metrics import f1_score, accuracy_score,  classification_report
def evaluate(file_path,model):
    # give the loaded file
    with open(file_path, 'r') as f:
        file = json.load(f)
    
    data = getloader(file)
    
    num_correct = 0
    num_wrong= 0
    c=0
    for batch, d in enumerate(data):
        
        news_article_input_ids, news_article_attention_mask,question_input_ids, question_attention_mask, answer, answer_option = d
        news_article_input_ids = news_article_input_ids.to(device)
        news_article_attention_mask = news_article_attention_mask.to(device)
        question_input_ids = question_input_ids.to(device)
        question_attention_mask = question_attention_mask.to(device)
        answer = answer.to(device)
        outputs = model(news_article_input_ids, news_article_attention_mask,question_input_ids, question_attention_mask, answer_option)
#         print(outputs)
          
        
        target_indices = torch.argmax(answer, dim=1)
        prediction_indices = torch.argmax(outputs, dim=1)
        print("Article in chinese:",file[c]['news_article'])
        print()
        print("Article in english:" ,arr[c]['news_article'])
        print()
        print("Question in chinese:", file[c]['question_stem'])
        print()
        print("Question in english:", arr[c]['question_stem'])
        print()
        for i in range(4):
            print(f"Option {i+1}-",file[c]['answer_options'][i])
        print()
        print("Predicted option:",int(prediction_indices[0])+1)
        print("Actual option:",int(target_indices[0])+1)
        # Compare the indices to find correct and wrong predictions
        num_correct += torch.sum(target_indices == prediction_indices).item()
        num_wrong += torch.sum(target_indices != prediction_indices).item()
        c+=1
        menu()
    
    print (f"Accuracy : {num_correct/(num_correct+num_wrong)}")

load_model = Chinese_answer_Model()
load_model.load_state_dict(torch.load('/kaggle/input/model2/Ayush_NLP_R3_final.pth'))
load_model.to(device)
load_model = nn.DataParallel(load_model)
file_path = "/kaggle/input/demofile/demofile.json"
evaluate(file_path, load_model)

In [11]:
arr=[{"news_article": """HTC (2498) yesterday (21) at the shareholders' meeting, Chairman Wang Xuehong responded to the request of small shareholders and promised to personally provide each small shareholder (excluding internal shareholders of the company) who completed the registration process before the end of the shareholders' meeting and was present at the venue with a brand-new HTC flagship smartphone, the new HTC One 32GB model, with a single unit market price of about 21,900 yuan, setting a record for the highest-priced gift in the history of Taiwanese shareholders' meetings. At the same time, HTC's board of directors and supervisors were re-elected, and former TSMC (2330) general manager Cai Lixing was elected as a new director, which was seen by the outside world as an alliance with TSMC to counter Samsung Electronics' technological competition.

According to statistics, there were about 100 eligible shareholders on that day. Calculated at a single unit price of 21,900 yuan, Wang Xuehong generously gave more than 2 million yuan worth of gifts to small shareholders who were concerned about HTC's operation at the venue. After reports from various media outlets over the past two days, discussions about "HTC", "HTC", and the "new HTC One" have intensified on the internet, achieving the effect of enhancing brand and visibility. HTC's marketing capabilities can be said to have reached a new level.

HTC's shareholders' meeting this year also comprehensively conducted the election of directors and supervisors. The elected directors include Wang Xuehong, Chen Wenqi, Zhuo Huotu, Cai Lixing, and David Bruce Yoffie; independent directors are Lin Zhenguo and Josef Felder; the new supervisor is Wei Zhi Investment Co., Ltd., and Zhu Huangjie.

Except for Cai Lixing, who is a newly appointed director of HTC, the rest of the elected directors and supervisors are re-elected this time.

Cai Lixing is the former general manager of TSMC (2330) and the current chairman of TSMC Solar Energy and Solid State Lighting; therefore, it is widely interpreted that, as TSMC Chairman Zhang Zhongmou has publicly praised HTC's mobile phone products multiple times and advocated for cooperation among MediaTek (2454), Hon Hai (2317), TSMC, and HTC in their respective semiconductor ICs, panel, wafer industries, and consumer mobile phone sectors to jointly counter Samsung Electronics; Cai Lixing's election as a director of HTC yesterday is the latest development in Taiwan's technology companies' joint resistance against Korea.""","question_stem":"""Wang Xuehong gave shareholders the new __, creating the highest-priced gift in Taiwan's shareholder meetings."""},{"news_article":"""Printed circuit board giant Yu Hwa (2367) held its regular shareholders' meeting today (24), which concluded smoothly with the agenda passing as scheduled (see image). During the business report, Yu Hwa's general manager, Hsu Zhenghong, pointed out that the company's operational performance was poor last year. However, since the beginning of this year, second-quarter revenue has improved compared to the first quarter, and there has been a significant improvement in profitability. The third quarter is expected to be better than the second quarter, with at least double-digit growth. Currently, Yu Hwa's capacity is almost fully utilized.

Hsu Zhenghong stated that the outlook for the fourth quarter is still promising, but uncertain. It will depend on the sales performance of consumer products. Adjustments may still be made. He mentioned that although the overall economic environment this year is still unfavorable, due to Yu Hwa's customer adjustments and development efforts, the company's operational growth in the second half of the year is expected to be positive.

Hsu Zhenghong indicated that current capacity is still insufficient. Capital expenditure for this year is expected to be at least 1.5 billion yuan. This amount has already been committed, and there may be further increases. The main expansion will focus on arbitrary layers. In the medium to long term, observation of the development of mid-to-low-end smartphones and the growth of various brand manufacturers will continue.

Analysts estimate that the revenue distribution between the first and second halves of this year is expected to be around 45:55.

Reflecting on 2012, Yu Hwa stated that smartphones and tablets were the main global consumer products that year, and they were excessively concentrated among a few customers, leading to a severe imbalance in supply and demand and unprecedented price competition. Despite focusing on high-end HDI products, Yu Hwa experienced a significant decline in revenue and profit in 2012 due to supply and demand imbalances and a sharp decline in product prices.

The shareholders' meeting today passed the financial report. Yu Hwa's revenue in 2012 was 11.903 billion yuan, with a gross profit margin of 7.6% and a net loss of 506 million yuan, resulting in a loss per share of 0.88 yuan. No dividend distribution was approved at the shareholders' meeting.""","question_stem":"""Yu Hwa: Q___Profit Shows Significant Improvement, Q3 Expected to See at Least Double-Digit Growth"""},{"news_article":"""On the morning of the 24th, Ching Cheng Bank (2809) held its regular shareholders' meeting at its headquarters in Tainan. The bank's after-tax profit for the fiscal year 101 amounted to 3,443,895,720 yuan. Apart from setting aside 30% of the statutory surplus as legal reserves, amounting to 1,033,168,716 yuan, the bank will distribute a cash dividend of 1.5 yuan.

Ching Cheng Bank pointed out that its after-tax profit for the fiscal year 101 reached 3.44 billion yuan, with an EPS of 3.28 yuan, representing a growth from an EPS of 2.47 yuan in the fiscal year 100. As of the end of May this year, the pre-tax profit amounted to 1.88 billion yuan, marking a 16% growth compared to the same period last year. The EPS stood at 1.66 yuan, indicating robust operational and profit capabilities.

The bank also highlighted its excellent asset quality, maintaining a low non-performing loan ratio of 0.12% as of the end of May, with a loan loss reserve coverage ratio of 1230%. The net interest margin reached 2.32%, and the net wealth management fee income steadily increased. Coupled with well-controlled operating expenses, these factors serve as important supports for enhancing profit capabilities.

Ching Cheng Bank further stated that it recently received the highest rating of "A++" in the "Listed and Over-the-Counter Company Information Disclosure Evaluation," marking the fifth consecutive year it has received this highest rating. This underscores the bank's management's proactive emphasis on corporate governance, receiving unanimous recognition from various sectors.

The bank emphasized that customers are the most important foundation for its development. Ching Cheng Bank actively cultivates customer relationships, is committed to long-term public welfare efforts, and regards corporate social responsibility as one of its core values. Looking ahead, Ching Cheng Bank will continue to strengthen its ties with the local community, develop more diversified and innovative businesses based on community banking, and strive to maximize value for customers, employees, and shareholders, becoming the most important driving force for the bank's growth.""","question_stem":"""Ching Cheng Bank passed a dividend of 1.__ yuan; its profit for the previous month saw a yearly growth of 16%."""},{"news_article":"""Unison Pharmacy, one of the subsidiaries of Unison Super (2912), holds approximately 73.74% of the shares. The company benefits from the rapid growth in sales of its "My Beauty Diary" facial mask brand. Apart from securing the top market share for facial masks in Taiwan, Hong Kong, and Singapore, it has also expanded into countries such as China, Japan, South Korea, Malaysia, Thailand, the United States, and Canada. In recent years, its revenue has maintained double-digit growth, reaching 3.363 billion yuan last year. Analysts project that, given the growth rate of Unison Pharmacy's operational scale, its revenue this year is expected to exceed 4 billion yuan.

Unison Pharmacy stated that the domestic pharmaceutical and beauty care market continues to expand. Besides the entry of many international beauty and health care brands into Taiwan, many domestic brands are also gradually emerging. Currently, Unison Pharmacy manages a total of 15 brands, including the distribution of 10 European and American health care and medical beauty brands (such as La Cime, Kobayashi Pharmaceutical from Japan (fever patches), and Avène) and the development of 5 proprietary brands in the pharmaceutical and cosmetic field, including "My Beauty Diary" (facial masks), "My White Skin" (skincare products), "My Health Diary" (health supplements), and FaceQ (foot/facial masks).

General Manager Zhang Cong pointed out that Unison Pharmacy's operational performance has shown double-digit growth in recent years. The main growth drivers come from beauty care and health food products. Coupled with the company's collaboration with the entire Unison Group (1216) in channel deployment, its total revenue last year reached 3.363 billion yuan, with a profit of 413 million yuan. The company's capital is approximately 300 million yuan, with an EPS of 13.75 yuan.

Zhang Cong mentioned that in terms of revenue proportion, due to the larger revenue scale of "My Beauty Diary," the revenue proportion of proprietary brands is higher than that of distributed brands. The company will not engage in new brand distribution this year. The current focus is on expanding existing brands. For example, "My White Skin" is expected to be further promoted in the Chinese market in the second half of this year. Moreover, in China, the market channels will transition from physical to virtual online platforms. Currently, FaceQ has established a flagship store on the Tmall Mall of Taobao.

According to the financial report released by Unison Super for the first quarter of this year, Unison Pharmacy's profit in the first quarter reached 135 million yuan, a year-on-year increase of 53.1%. It contributed 99.284 million yuan to Unison Super's profit, second only to Cosmed (of which Unison Super holds 100%), which contributed 101 million yuan. Considering the current growth rate of the company's operations, its revenue is expected to exceed 4 billion yuan this year.""","question_stem":"""Unison Pharmacy's operations at home and abroad have been fruitful, and this year's revenue is expected to exceed __ billion yuan"""}]