In [1]:
import pandas as pd
from textblob import TextBlob
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns
from wordcloud import WordCloud
from nrclex import NRCLex


In [2]:
# Set larger font sizes for all plot elements
plt.rcParams.update({'font.size': 16})

In [3]:
# Load the dataset- a CSV file - with a 'Lemmatized_Comments' column
df = pd.read_csv(r'C:\Users\paris\OneDrive\thesis\collect data\Analysis\data processing\Cleaned_Instagram-comments3.csv')[['Lemmatized_Comments']]

In [4]:
# Safe function for sentiment analysis
def safe_get_sentiment(text):
    try:
        if isinstance(text, str):  # Ensure input is a string
            return TextBlob(text).sentiment.polarity  # Extract only polarity
    except Exception as e:
        print(f"Error processing text: {text} - {e}")
    return 0.0  # Default polarity for invalid inputs

# Function to categorize sentiment based on polarity
def categorize_sentiment(polarity):
    if polarity > 0:
        return 'positive'
    elif polarity < 0:
        return 'negative'
    else:
        return 'neutral'

# Apply sentiment analysis
df['Polarity'] = df['Lemmatized_Comments'].apply(safe_get_sentiment)
df['TextBlob_polarity'] = df['Polarity'].apply(categorize_sentiment)

# Save visualizations as PDFs
with PdfPages('TextBlob_polarity_histogram_ins_2.pdf') as pdf:
    plt.figure(figsize=(8, 6))
    plt.hist(df['Polarity'], bins=20, color='blue', edgecolor='black')
    plt.xlabel('Polarity')
    plt.ylabel('Frequency')
    plt.grid(True)
    plt.tight_layout()
    pdf.savefig()
    plt.close()

with PdfPages('TextBlob_polarity_bar_chart_ins_2.pdf') as pdf:
    plt.figure(figsize=(8, 6))
    df['TextBlob_polarity'].value_counts().plot(kind='bar', color=['green', 'red', 'gray'], edgecolor='black')
    plt.xlabel('polarity Category')
    plt.ylabel('Frequency')
    plt.tight_layout()
    plt.grid(axis='y')
    pdf.savefig()
    plt.close()

print("Two PDF files created: 'TextBlob_polarity_histogram_ins.pdf' and 'TextBlob_polarity_bar_chart_ins.pdf'")
print(df[['Lemmatized_Comments', 'Polarity', 'TextBlob_polarity']].head())



Two PDF files created: 'TextBlob_polarity_histogram_ins.pdf' and 'TextBlob_polarity_bar_chart_ins.pdf'
                        Lemmatized_Comments  Polarity TextBlob_polarity
0  look nourish perfect keep lips soft cool  0.483333          positive
1                                    pretty  0.250000          positive
2                               labour love  0.500000          positive
3                                       wow  0.100000          positive
4                           oh soooo pretty  0.250000          positive


In [5]:
# Ensure no missing values in the input column
df = df.dropna(subset=['Lemmatized_Comments'])

# Function to classify detailed emotions based on TextBlob's sentiment analysis
def TextBlob_emotion_classification(text):
    analysis = TextBlob(text)
    polarity = analysis.sentiment.polarity
    subjectivity = analysis.sentiment.subjectivity

    if polarity > 0.5:
        return 'Joyful'
    elif polarity > 0.1:
        if subjectivity > 0.5:
            return 'Trust'
        else:
            return 'Content'
    elif polarity < -0.1:
        if subjectivity > 0.5:
            return 'Disappointment'
        else:
            return 'Discontent'
    elif polarity < -0.5:
        return 'Anger'
    else:
        return 'Neutral'

# Apply the detailed emotion classification
df['TextBlob_Emotion'] = df['Lemmatized_Comments'].apply(TextBlob_emotion_classification)

# Ensure an Engagement column exists; if not, create a dummy Engagement column for visualization
if 'Engagement' not in df.columns:
    df['Engagement'] = 1  # Assign a default value of 1 for all rows

# Aggregate total engagement by detailed emotions
emotion_engagement = df.groupby('TextBlob_Emotion')['Engagement'].sum().reset_index()

# Creating the visualization
plt.figure(figsize=(12, 8))
bar_plot = sns.barplot(x='Engagement', y='TextBlob_Emotion', data=emotion_engagement, palette='viridis')


plt.xlabel('Frequency', fontsize=14)
plt.ylabel('Emotion', fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()

# Save the plot to a file
plt.savefig('TextBlob_emotion_plot_ins_2.png')
plt.close()  # Close the plot display to save memory

print("Plot saved successfully as 'TextBlob_emotion_plot_ins.png'.")



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  bar_plot = sns.barplot(x='Engagement', y='TextBlob_Emotion', data=emotion_engagement, palette='viridis')


Plot saved successfully as 'TextBlob_emotion_plot_ins.png'.


In [6]:
print(df[['Lemmatized_Comments', 'TextBlob_polarity', 'TextBlob_Emotion']])

                          Lemmatized_Comments TextBlob_polarity  \
0    look nourish perfect keep lips soft cool          positive   
1                                      pretty          positive   
2                                 labour love          positive   
3                                         wow          positive   
4                             oh soooo pretty          positive   
..                                        ...               ...   
411                            all best brand          positive   
412                            right ? ? love          positive   
413                                     woooo           neutral   
414                       julep gluten free ?          positive   
415         leapingbunnycertified crueltyfree           neutral   

    TextBlob_Emotion  
0              Trust  
1              Trust  
2              Trust  
3            Neutral  
4              Trust  
..               ...  
411           Joyful  
412        

In [7]:
df.to_csv(r'C:\Users\paris\OneDrive\thesis\collect data\TextBlob_polarity_Emotion_Analysis26.csv', index=False)

In [8]:
# Bar Chart for TextBlob_Emotion
plt.figure(figsize=(12, 8))
sns.barplot(x='Engagement', y='TextBlob_Emotion', data=emotion_engagement, palette='viridis')


plt.xlabel('Frequency', fontsize=14)
plt.ylabel('Emotion', fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.grid(True, linestyle='--', alpha=0.6)
plt.tight_layout()

# Save the plot to a file
plt.savefig('TextBlob_emotion_bar_chart_ins_2.png')
plt.close()

print("Bar chart saved successfully as 'TextBlob_emotion_bar_chart_iiins.png'.")



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x='Engagement', y='TextBlob_Emotion', data=emotion_engagement, palette='viridis')


Bar chart saved successfully as 'TextBlob_emotion_bar_chart_iiins.png'.


In [9]:
# Prepare data for the bar chart
emotions = emotion_engagement['TextBlob_Emotion']
counts = emotion_engagement['Engagement']

# Create the bar chart
plt.figure(figsize=(10, 6))
plt.bar(emotions, counts, color='skyblue')
plt.title('TextBlob Emotion Analysis on Instagram', fontsize=14)
plt.xlabel('Emotions', fontsize=14)
plt.ylabel('Frequency', fontsize=14)
plt.xticks(rotation=45, ha='right', fontsize=14)  # Rotate x-axis labels
plt.tight_layout()

# Save the chart to a file
plt.savefig('TextBlob_emotion_bar_chart_ins_2.png')
plt.close()

print("Bar chart saved successfully as 'TextBlob_emotion_bar_chart_ins.png'.")


Bar chart saved successfully as 'TextBlob_emotion_bar_chart_ins.png'.


In [17]:
# Pie Chart for TextBlob_Emotion
# Set larger font sizes for all plot elements
plt.rcParams.update({'font.size': 14})

# Custom colors
custom_colors = {
    'Joyful': 'gold',
    'Trust': 'purple',
    'Content': 'pink',
    'Neutral': 'skyblue',
    'Discontent': 'gray',
    'Disappointment': '#D2B48C',  # Light brown (hex code for "tan")
    'Anger': 'red'
}

 #Map colors to emotions
colors = [custom_colors[emotion] for emotion in emotion_engagement['TextBlob_Emotion']]

plt.figure(figsize=(8, 8))
plt.pie(
    emotion_engagement['Engagement'],
    labels=emotion_engagement['TextBlob_Emotion'],
    #autopct='%1.1f%%',
    #startangle=176,
    colors= colors,  
)

# Add title
plt.title('TextBlob Emotion Analysis on Instagram', fontsize=14)

# Save the plot
plt.savefig('TextBlob_emotion_pie_chart_Instagram_2.png')
plt.close()

print("Pie chart saved successfully as 'TextBlob_emotion_pie_chart_Instagram.png'.")


Pie chart saved successfully as 'TextBlob_emotion_pie_chart_Instagram.png'.


In [11]:
# Generate text data for word cloud
text = ' '.join(df['TextBlob_Emotion'])

# Create the word cloud
wordcloud = WordCloud(
    width=800, height=400, background_color='white', colormap='cool'
).generate(text)

# Plot the word cloud
plt.figure(figsize=(10, 6))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')  # Turn off axis
plt.title('Word Cloud: TextBlob Emotions', fontsize=16)

# Save the word cloud to a file
plt.savefig('TextBlob_emotion_wordcloud_ins.png')
plt.close()

print("Word cloud saved successfully as 'TextBlob_emotion_wordcloud_ins.png'.")


Word cloud saved successfully as 'TextBlob_emotion_wordcloud_ins.png'.


In [12]:
# Define your keywords
keywords = [
    'refillablecosmetics', 'earthsustainable', 'sustainablemakeup', 'waste', 'economiacircular', 'ecofriendlymakeup', 'sustainableliving', 
    'plasticfree', 'planet', 'sustainability', 'consciousconsumer', 'ecofriendlyproducts', 'fairtradeskincare', 'zerowastemakeup', 'tree conservation',
    'ingredient', 'climate', 'health', 'ecofriendlyliving', 'ethicalcompany', 'refillable', 'refillablemakeup', 'environmental', 'sustainablelivingblog',
    'verify', 'ethicalskincare', 'compostable', 'wastefree', 'environmentally', 'zerowasteliving', 'ecofriendlyskincare', 'tree planting', 'sustainable',
    'ecofriendly', 'compostablelipbalm', 'organiclipbalm', 'reuse', 'refill', 'test', 'fairewholesale', 'testimony', 'preservativefree', 
    'ecomindedchildhood', 'recycledmaterials', 'sustainablebusiness', 'naturalbeauty', 'reduce', 'zerowastecosmetics', 'natural', 'refillrevolution', 
    'sustainableskincare', 'circularity', 'zero', 'fairly', 'refillablebeauty', 'gogreen', 'crueltyfreeskincare', 'planting', 'circulardesign', 
    'veganbeauty', 'plastic', 'sustainabilitymatters', 'conscious', 'organicskincare', 'eco', 'preservative', 'chemical', 'alternative', 'vegan', 
    'plant', 'gluten free', 'recycle', 'compostableskincare', 'circularbeauty', 'ethical', 'sustainablebrand', 'green', 'ecobeauty', 'sustainableclothing',
    'packaging', 'earth', 'leapingbunnycertified', 'zerowastelifestyle', 'zerowaste', 'reusable', 'cruelty', 'sustainablebeauty', 'cleanbeauty', 'safe',
    'environment', 'palmoilfreeskincare', 'refillableskincare', 'crueltyfree', 'fragrance', 'leap bunny', 'zerowasteskincare', 'zerowastestore', 'wasted',
    'none plastic', 'sustainablelivingblogger', 'harmful', 'sustainablepackaging'
]

# Initialize a dictionary to store results
keyword_emotions = {keyword: [] for keyword in keywords}

# Function to find keywords in a specific column and analyze emotions
def analyze_caption_emotions(caption):
    for keyword in keywords:
        if keyword in caption.lower():  # Check if the keyword is in the caption
            text_object = NRCLex(caption)
            emotion_dict = text_object.raw_emotion_scores
            if emotion_dict:  # If emotions are found, add them to the dictionary
                keyword_emotions[keyword].append(emotion_dict)

# Apply the function to input column
df['Lemmatized_Comments'].apply(analyze_caption_emotions)

# Prepare data for visualization
data_for_visualization = {
    keyword: pd.DataFrame(emotions).mean().to_dict()
    for keyword, emotions in keyword_emotions.items() if emotions
}

# Convert to DataFrame for easier manipulation and visualization
emotion_df = pd.DataFrame(data_for_visualization).fillna(0).T  # Transpose to get keywords as rows
emotion_df.reset_index(inplace=True)
emotion_df.rename(columns={'index': 'Keyword'}, inplace=True)

# Plotting heatmap for all keywords
with PdfPages('emotion_heatmap_results_instagram.pdf') as pdf:
    plt.figure(figsize=(15, 12))
    sns.heatmap(
        emotion_df.iloc[:, 1:],  # Exclude keyword column for heatmap
        annot=True, fmt=".2f", cmap='coolwarm', 
        xticklabels=emotion_df.columns[1:], 
        yticklabels=emotion_df['Keyword']
    )
    plt.xlabel('Emotions', fontsize=14)
    plt.ylabel('Keywords', fontsize=14)
    plt.tight_layout()
    pdf.savefig()  # Save the heatmap to a PDF file
    plt.close()

print("PDF file with the heatmap of emotion analysis results has been created.")


PDF file with the heatmap of emotion analysis results has been created.
