In [None]:
# Install necessary libraries
# !pip install pandas matplotlib nltk tweepy wordcloud seaborn

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from wordcloud import WordCloud
import requests
from io import StringIO
from IPython.display import Image, display

# Set up NLTK
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('vader_lexicon')

# Function to perform sentiment analysis
def analyze_sentiment(text):
    if isinstance(text, str):
        sia = SentimentIntensityAnalyzer()
        score = sia.polarity_scores(text)['compound']
        return 'positive' if score >= 0.05 else ('negative' if score <= -0.05 else 'neutral')
    else:
        return 'neutral'  # Handle missing values

# Function to generate word cloud
def generate_wordcloud(text):
    wordcloud = WordCloud(width=800, height=400, random_state=21, max_font_size=110, background_color='white').generate(text)
    plt.figure(figsize=(10, 7))
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis('off')
    plt.savefig('wordcloud.png')
    plt.close()

# Load social media data from a local CSV file
# Replace 'your_local_file.csv' with the path to your local CSV file
local_csv_path = 'twitter_training.csv'
df = pd.read_csv(local_csv_path)

# Print the first few rows of the DataFrame
print(df.head())

# Perform sentiment analysis on the 'text' column if it exists
if 'text' in df.columns:
    df['sentiment'] = df['text'].apply(analyze_sentiment)

    # Print rows with problematic text
    problematic_rows = df[df['text'].apply(lambda x: not isinstance(x, str))]
    print("Rows with problematic text:")
    print(problematic_rows)

    # Visualize sentiment distribution using Seaborn and save the plot
    plt.figure(figsize=(8, 6))
    sns.countplot(x='sentiment', data=df, palette=['green', 'red', 'gray'])
    plt.title('Sentiment Distribution in Social Media Data')
    plt.xlabel('Sentiment')
    plt.ylabel('Count')
    plt.savefig('sentiment_distribution.png')
    plt.close()

    # Generate and save word cloud for positive sentiment
    positive_text = ' '.join(df[df['sentiment'] == 'positive']['text'])
    generate_wordcloud(positive_text)

    # Display saved plots
    display(Image(filename='sentiment_distribution.png'))
    display(Image(filename='wordcloud.png'))
else:
    print("The 'text' column is not present in the DataFrame.")
