In [2]:
import random
from snownlp import SnowNLP, sentiment

# 从文件加载数据
def read_data(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = file.readlines()
    return [line.strip() for line in data if line.strip()]

# 将数据拆分为训练集和测试集
def split_data(data, train_ratio=0.8):
    random.shuffle(data)
    train_size = int(len(data) * train_ratio)
    train_data = data[:train_size]
    test_data = data[train_size:]
    return train_data, test_data

# 将数据保存到文件
def save_data(data, file_path):
    with open(file_path, 'w', encoding='utf-8') as file:
        for line in data:
            file.write(f"{line}\n")

# 测试模型
def test_model(test_data, label):
    if not test_data:
        return 0.0  # 如果测试数据为空，则返回0的准确率
    correct = 0
    for sentence in test_data:
        s = SnowNLP(sentence)
        prediction = s.sentiments
        if label == 'positive' and prediction > 0.5:
            correct += 1
        elif label == 'negative' and prediction <= 0.5:
            correct += 1
    accuracy = correct / len(test_data)
    return accuracy

# 加载数据
positive_data = read_data('pos.txt')
negative_data = read_data('neg.txt')

# 拆分数据
positive_train, positive_test = split_data(positive_data)
negative_train, negative_test = split_data(negative_data)

# 保存训练集和测试集数据
save_data(positive_train, 'positive_train.txt')
save_data(negative_train, 'negative_train.txt')
save_data(positive_test, 'positive_test.txt')
save_data(negative_test, 'negative_test.txt')

# 训练模型
sentiment.train('positive_train.txt', 'negative_train.txt')
sentiment.save('sentiment.marshal')


In [10]:
# 加载模型
sentiment.load('Model/Douban+B站/sentiment.marshal')

t2 = "今天天气真好啊"
s = SnowNLP(t2)
s.sentiments

0.2641523905731238

In [7]:

# 加载测试数据
positive_test_data = read_data('positive_test.txt')
negative_test_data = read_data('negative_test.txt')

# 测试模型
positive_accuracy = test_model(positive_test_data, 'positive')
negative_accuracy = test_model(negative_test_data, 'negative')

# 打印结果
print(f"Positive accuracy: {positive_accuracy * 100:.2f}%")
print(f"Negative accuracy: {negative_accuracy * 100:.2f}%")
print(f"Overall accuracy: {(positive_accuracy + negative_accuracy) / 2 * 100:.2f}%")

Positive accuracy: 1.22%
Negative accuracy: 20.55%
Overall accuracy: 10.88%
