# 🇨🇳 Chinese Sentiment Analysis

Simple Chinese sentiment analysis in Google Colab.

## How to use:
1. Upload Excel file with text column
2. Run all cells
3. Download results

## 📦 Install Packages

In [None]:
!pip install torch transformers pandas openpyxl matplotlib seaborn wordcloud

## 📚 Import Libraries

In [None]:
import torch
import pandas as pd
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from google.colab import files
from io import BytesIO
import warnings
warnings.filterwarnings('ignore')
print('✅ Libraries loaded')

## 🤖 Load Model

In [None]:
# Load Chinese sentiment model
model_name = 'IDEA-CCNL/Erlangshen-RoBERTa-110M-Sentiment'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
print(f'✅ Model loaded on {device}')

## 📁 Upload File

In [None]:
# Upload Excel file
uploaded = files.upload()
filename = list(uploaded.keys())[0]
df = pd.read_excel(BytesIO(uploaded[filename]))
print(f'✅ File loaded: {len(df)} rows')
print('Columns:', list(df.columns))
df.head()

## 🧠 Analyze Sentiment

In [None]:
# Set text column name (change this to match your file)
text_column = 'text'

def analyze_sentiment(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, max_length=128)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.softmax(outputs.logits, dim=1)
        sentiment_id = torch.argmax(probs, dim=1).item()
        confidence = probs[0][sentiment_id].item()
        labels = ['negative', 'positive']
        return labels[sentiment_id], confidence

# Analyze all texts
results = []
for text in df[text_column]:
    if pd.isna(text):
        sentiment, conf = 'neutral', 0.0
    else:
        sentiment, conf = analyze_sentiment(str(text))
    results.append({'text': text, 'sentiment': sentiment, 'confidence': conf})

results_df = pd.DataFrame(results)
print('✅ Analysis complete!')
print('\nSentiment distribution:')
print(results_df['sentiment'].value_counts())
results_df.head()

## 🎨 Visualize Results

In [None]:
# Create charts
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

# Pie chart
sentiment_counts = results_df['sentiment'].value_counts()
ax1.pie(sentiment_counts.values, labels=sentiment_counts.index, autopct='%1.1f%%')
ax1.set_title('Sentiment Distribution')

# Bar chart
ax2.bar(sentiment_counts.index, sentiment_counts.values)
ax2.set_title('Sentiment Counts')
ax2.set_ylabel('Count')

plt.tight_layout()
plt.show()

## 💾 Download Results

In [None]:
# Save results
output_file = 'chinese_sentiment_results.xlsx'
results_df.to_excel(output_file, index=False)
files.download(output_file)
print('✅ Results downloaded!')