# Student Sentiment Analysis - Google Colab Notebook

This notebook provides complete data cleaning, preprocessing, and visualization for student feedback sentiment analysis.

**No frontend dependencies - Standalone analysis**

## 1. Install Required Packages

In [None]:
# Install required packages
!pip install textblob wordcloud -q

# Download TextBlob corpora
import nltk
nltk.download('brown', quiet=True)
nltk.download('punkt', quiet=True)

print("✓ All packages installed successfully!")

## 2. Import the Sentiment Analysis Module

In [None]:
# Copy-paste the entire sentiment_analysis_colab.py content here
# Or upload the file and import it

# For Colab, you can upload the file using:
from google.colab import files
uploaded = files.upload()

# Then import
from sentiment_analysis_colab import SentimentAnalysisColab

## Alternative: Define Classes Directly (Embedded Code)

In [None]:
# Import libraries
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from textblob import TextBlob
from collections import Counter
from wordcloud import WordCloud
import warnings
warnings.filterwarnings('ignore')

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

print("✓ Libraries imported successfully!")

## 3. Upload Your Data File

In [None]:
# Upload CSV file
from google.colab import files
uploaded = files.upload()

# Get the uploaded filename
filename = list(uploaded.keys())[0]
print(f"\n✓ Uploaded: {filename}")

## 4. Load and Preview Data

In [None]:
# Load data
df = pd.read_csv(filename)

print(f"Dataset shape: {df.shape}")
print(f"\nColumns: {list(df.columns)}")
print(f"\nFirst 5 rows:")
df.head()

## 5. Initialize Analyzer and Run Complete Analysis

In [None]:
# Initialize analyzer
analyzer = SentimentAnalysisColab(filename)

# Run complete analysis (cleaning + visualization)
cleaned_df = analyzer.run_complete_analysis()

## 6. View Cleaned Data

In [None]:
# Display cleaned dataframe
print("Cleaned Data Sample:")
cleaned_df.head(10)

## 7. Generate Individual Visualizations

In [None]:
# Create visualizer
from sentiment_analysis_colab import SentimentVisualizer
viz = SentimentVisualizer(cleaned_df)

# Individual plots
viz.plot_sentiment_distribution()

In [None]:
viz.plot_sentiment_pie()

In [None]:
viz.plot_average_ratings()

In [None]:
viz.plot_all_ratings()

In [None]:
viz.plot_top_keywords(20)

In [None]:
viz.plot_wordcloud()

In [None]:
viz.plot_polarity_distribution()

## 8. Export Cleaned Data

In [None]:
# Export cleaned data
analyzer.export_cleaned_data('cleaned_sentiment_data.csv')

# Download the file
from google.colab import files
files.download('cleaned_sentiment_data.csv')

## 9. Custom Analysis

You can perform custom analysis using the cleaned dataframe:

In [None]:
# Example: Filter happy sentiment feedback
happy_feedback = cleaned_df[cleaned_df['comment_sentiment'] == 'happy']
print(f"Total happy feedback: {len(happy_feedback)}")
print("\nSample happy comments:")
print(happy_feedback[['comment', 'comment_polarity']].head())

In [None]:
# Example: Get statistics by semester
if 'semester_id' in cleaned_df.columns:
    semester_stats = cleaned_df.groupby('semester_id')['comment_sentiment'].value_counts().unstack(fill_value=0)
    print("Sentiment by Semester:")
    print(semester_stats)

In [None]:
# Example: Top subjects by average rating
if 'subject_name' in cleaned_df.columns:
    rating_cols = [col for col in cleaned_df.columns if col.endswith('_numeric')]
    if rating_cols:
        cleaned_df['avg_rating'] = cleaned_df[rating_cols].mean(axis=1)
        top_subjects = cleaned_df.groupby('subject_name')['avg_rating'].mean().sort_values(ascending=False).head(10)
        print("Top 10 Subjects by Average Rating:")
        print(top_subjects)

## 10. Generate Sample Data (Optional)

If you don't have data, you can generate sample data:

In [None]:
# Generate sample data
import random
from datetime import datetime, timedelta

def generate_sample_data(num_rows=100):
    happy = ["Excellent teaching", "Great course", "Very helpful professor"]
    neutral = ["Average course", "Okay teaching", "submitted"]
    bad = ["Poor teaching", "Needs improvement", "Boring course"]
    
    data = []
    for i in range(num_rows):
        sent_type = random.choice(['happy', 'neutral', 'bad'])
        if sent_type == 'happy':
            comment = random.choice(happy)
            ratings = ['Excellent', 'Very Good']
        elif sent_type == 'neutral':
            comment = random.choice(neutral)
            ratings = ['Average', 'Good']
        else:
            comment = random.choice(bad)
            ratings = ['Poor', 'Bad']
        
        data.append({
            'teaching': random.choice(ratings),
            'course_content': random.choice(ratings),
            'examination': random.choice(ratings),
            'lab_support': random.choice(ratings),
            'teaching_method': random.choice(ratings),
            'library_support': random.choice(ratings),
            'comment': comment,
            'created_at': (datetime.now() - timedelta(days=random.randint(0, 180))).strftime('%Y-%m-%d')
        })
    
    return pd.DataFrame(data)

# Generate and save
sample_df = generate_sample_data(150)
sample_df.to_csv('sample_data.csv', index=False)
print("✓ Sample data generated: sample_data.csv")
sample_df.head()