In [1]:
import random
from snownlp import SnowNLP, sentiment

# Load data from file
def read_data(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = file.readlines()
    return [line.strip() for line in data if line.strip()]

# Split data into training set and test set
def split_data(data, train_ratio=0.8):
    random.shuffle(data)
    train_size = int(len(data) * train_ratio)
    train_data = data[:train_size]
    test_data = data[train_size:]
    return train_data, test_data

# Save data to file
def save_data(data, file_path):
    with open(file_path, 'w', encoding='utf-8') as file:
        for line in data:
            file.write(f"{line}\n")

# Test the model
def test_model(test_data, label):
    if not test_data:
        return 0.0  # If test data is empty, return 0.0
    correct = 0
    for sentence in test_data:
        s = SnowNLP(sentence)
        prediction = s.sentiments
        if label == 'positive' and prediction > 0.5:
            correct += 1
        elif label == 'negative' and prediction <= 0.5:
            correct += 1
    accuracy = correct / len(test_data)
    return accuracy



In [5]:

def train():
    # Load data
    positive_data = read_data('Data/pos.txt')
    negative_data = read_data('Data/neg.txt')
    # Split data
    positive_train, positive_test = split_data(positive_data)
    negative_train, negative_test = split_data(negative_data)
    # Save data
    save_data(positive_train, 'Data/positive_train.txt')
    save_data(negative_train, 'Data/negative_train.txt')
    save_data(positive_test, 'Data/positive_test.txt')
    save_data(negative_test, 'Data/negative_test.txt')
    sentiment.train('Data/negative_train.txt', 'Data/positive_train.txt')
    sentiment.save('Model/sentiment.marshal')


t2 = "今天天气真好啊"
train()
sentiment.load('Model/sentiment.marshal')
s = SnowNLP(t2)
print(s.sentiments)


0.7325370630041197


In [3]:

# Load the model
sentiment.load('Model/MovieComment.marshal')

# Read the test data
positive_test_data = read_data('Data/positive_test.txt')
negative_test_data = read_data('Data/negative_test.txt')

# Test the model
positive_accuracy = test_model(positive_test_data, 'positive')
negative_accuracy = test_model(negative_test_data, 'negative')

# Print the results
print(f"Positive accuracy: {positive_accuracy * 100:.2f}%")
print(f"Negative accuracy: {negative_accuracy * 100:.2f}%")
print(f"Overall accuracy: {(positive_accuracy + negative_accuracy) / 2 * 100:.2f}%")

Positive accuracy: 90.51%
Negative accuracy: 87.14%
Overall accuracy: 88.83%
