<a href="https://colab.research.google.com/github/0AKLEYYY/aiot/blob/main/emotion_food_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [33]:
!pip install transformers datasets peft --upgrade -q --no-cache-dir

In [34]:
import transformers
print(transformers.__version__)

4.52.4


In [35]:
import pandas as pd
df = pd.read_csv("emotion_data.csv", encoding="big5")
df.head()


Unnamed: 0,text,emotion,food_type,budget
0,我今天心情不好，想大吃一頓,傷心,吃到飽,不限
1,好累好煩，想喝熱湯或吃點熱的,疲累,不限,中
2,終於放假了！想吃壽司慶祝一下,開心,壽司,高
3,不知道吃什麼，有推薦嗎？,普通,不限,中
4,想來點甜點療癒一下,開心,甜點,中


In [36]:
from sklearn.preprocessing import LabelEncoder
import json

label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['emotion'])

# 儲存 label_map.json
label_map = {i: label for i, label in enumerate(label_encoder.classes_)}
with open("label_map.json", "w", encoding="utf-8") as f:
    json.dump(label_map, f, ensure_ascii=False)

# 最後只保留要訓練的欄位
df = df[['text', 'label']]
df.head()


Unnamed: 0,text,label
0,我今天心情不好，想大吃一頓,0
1,好累好煩，想喝熱湯或吃點熱的,3
2,終於放假了！想吃壽司慶祝一下,4
3,不知道吃什麼，有推薦嗎？,2
4,想來點甜點療癒一下,4


In [37]:
from sklearn.model_selection import train_test_split
train_texts, val_texts, train_labels, val_labels = train_test_split(
    df['text'].tolist(), df['label'].tolist(), test_size=0.2
)


In [38]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained("bert-base-chinese")
train_encodings = tokenizer(train_texts, truncation=True, padding=True)
val_encodings = tokenizer(val_texts, truncation=True, padding=True)


In [39]:
import torch

class EmotionDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    def __getitem__(self, idx):
        return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()} | {'labels': torch.tensor(self.labels[idx])}
    def __len__(self):
        return len(self.labels)

train_dataset = EmotionDataset(train_encodings, train_labels)
val_dataset = EmotionDataset(val_encodings, val_labels)


In [40]:
import os
os.environ["WANDB_DISABLED"] = "true"  # ✅ 關掉 wandb 強制跳出
from transformers import BertForSequenceClassification, Trainer, TrainingArguments

model = BertForSequenceClassification.from_pretrained("bert-base-chinese", num_labels=len(label_map))

training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_dir="./logs",
    report_to=None
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset
)

trainer.train()


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-chinese and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Epoch,Training Loss,Validation Loss
1,No log,1.593877
2,No log,1.522943
3,No log,1.50887


TrainOutput(global_step=6, training_loss=1.6537221272786458, metrics={'train_runtime': 70.1123, 'train_samples_per_second': 0.428, 'train_steps_per_second': 0.086, 'total_flos': 292924494540.0, 'train_loss': 1.6537221272786458, 'epoch': 3.0})

In [41]:
model.save_pretrained("model")
tokenizer.save_pretrained("model")


('model/tokenizer_config.json',
 'model/special_tokens_map.json',
 'model/vocab.txt',
 'model/added_tokens.json')