In [1]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

## Load TH and US Mask Wearing Datasets

In [2]:
thailand_df = pd.read_csv('stance_emotion_th_mask.csv')
usa_df = pd.read_csv('stance_emotion_us_mask.csv')

In [3]:
thailand_df['country'] = 'Thailand'
usa_df['country'] = 'USA'

In [4]:
combined_comments = pd.concat([thailand_df, usa_df], axis=0)
combined_comments['event_date'] = pd.to_datetime(combined_comments['event_date'])
combined_comments['year'] = combined_comments['event_date'].dt.to_period('Y').astype(str)
combined_comments['month'] = combined_comments['event_date'].dt.to_period('M')

thailand_comments = combined_comments[combined_comments['country'] == 'Thailand']
usa_comments = combined_comments[combined_comments['country'] == 'USA']

combined_comments

Unnamed: 0,instruction,input,news_publisher,event_date,english_comment_text,comment_text,anger_intensity,fear_intensity,joy_intensity,emotional_classification,pred_stance,country,year,month
0,You are a helpful assistant tasked with analyz...,Identify the stance of tweet: `It's not strang...,TNN,2021-05-04,It's not strange. We've known for a long time ...,มันไม่แปลกหรอก เราก็รู้กันมาตั้งนานแล้วว่ามันแ...,0.438,0.393,0.333,surprise.,Favorable,Thailand,2021,2021-05
1,You are a helpful assistant tasked with analyz...,Identify the stance of tweet: `I see a lot of ...,TNN,2021-02-03,I see a lot of people in bars and cabaret show...,เห็นตามบาร์คาบาร์เร่ ตามสนามบอลคนเติมเพียบน่าก...,0.438,0.479,0.271,"joy, optimism.",Favorable,Thailand,2021,2021-02
2,You are a helpful assistant tasked with analyz...,Identify the stance of tweet: `If masks really...,TNN,2021-04-11,"If masks really worked, everyone at the meetin...",ถ้าหน้ากากป้องกันได้จริง..คนเข้าประชุมทุกตนใส่...,0.521,0.562,0.231,"anticipation, disgust, fear, sadness, surprise.",Neutral,Thailand,2021,2021-04
3,You are a helpful assistant tasked with analyz...,Identify the stance of tweet: `I wear two mask...,TNN,2021-07-30,I wear two masks. If I go shopping and meet ot...,ใส่แมสสองชั้นค่ะ ถ้าไปซื้อของเจอคนอื่น เดินผ่า...,0.292,0.292,0.292,"anticipation, joy, sadness.",Favorable,Thailand,2021,2021-07
4,You are a helpful assistant tasked with analyz...,Identify the stance of tweet: `If you're going...,TNN,2021-06-29,"If you're going to wear a mask like that, you'...",ถ้าจะมาติดหน้ากากแบบนี้ติดอยู่ดีเวลาถอดมันก็เข...,0.438,0.520,0.214,"anticipation, disgust, fear, pessimism.",Favorable,Thailand,2021,2021-06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9133,You are a helpful assistant tasked with analyz...,identify the stance of tweet: 'i refuse to wea...,Fox News,2020-09-02,,i refuse to wear a commie mask\nand if you don...,0.562,0.333,0.233,"anger, disgust.",Against,USA,2020,2020-09
9134,You are a helpful assistant tasked with analyz...,"identify the stance of tweet: 'its true, my jo...",Fox News,2021-09-23,,"its true, my job is opening back up. they are ...",0.433,0.438,0.320,"anticipation, fear, joy, sadness.",Against,USA,2021,2021-09
9135,You are a helpful assistant tasked with analyz...,identify the stance of tweet: 'i just wonder w...,Fox News,2022-04-08,,i just wonder why he didn't wear a mask when a...,0.479,0.479,0.271,"anticipation, disgust, surprise.",Favorable,USA,2022,2022-04
9136,You are a helpful assistant tasked with analyz...,identify the stance of tweet: 'so because it w...,Fox News,2022-04-08,,so because it was a black woman she doesn't ha...,0.625,0.520,0.214,"anger, disgust.",Favorable,USA,2022,2022-04


## News Publisher Distribution for Thailand and USA

In [5]:
def plot_news_publisher_distribution(df, title, width=800, height=400):
    fig = px.pie(df, names='news_publisher', title=title, hole=0)
    fig.update_traces(textinfo='label+percent+value')
    fig.update_layout(width=width, height=height)
    fig.show()

# Plot the news publisher distribution for Thai comments
plot_news_publisher_distribution(thailand_comments, 'News Publisher Distribution (Thailand)')

# Plot the news publisher distribution for English comments
plot_news_publisher_distribution(usa_comments, 'News Publisher Distribution (USA)')

## Comments Distribution by Year for Thailand and USA

In [6]:
def plot_comment_distribution_by_year(df, country, title, width=600, height=400):
  df_yearly = df.groupby('year').size().reset_index(name='count')
  fig = px.bar(df_yearly, x='year', y='count', title=title, labels={'year': 'Year', 'count': 'Number of Comments'})
  fig.update_layout(width=width, height=height)

  # Add text annotations for count inside the bars
  fig.update_traces(textposition='inside', text=df_yearly['count'].astype(str))

  fig.show()

plot_comment_distribution_by_year(thailand_comments, 'Thailand', 'Comment Distribution by Year (Thailand)')
plot_comment_distribution_by_year(usa_comments, 'USA', 'Comment Distribution by Year (USA)')

## Emotions Comparison Thailand and USA

In [7]:
def plot_emotion_distribution(df, title):
    emotions = ['anger_intensity', 'fear_intensity', 'joy_intensity']
    df_melted = df.melt(id_vars=['country'], value_vars=emotions, var_name='emotion', value_name='intensity')

    fig = px.box(df_melted, x='emotion', y='intensity', color='country', title=title)
    fig.update_layout(width=800, height=400)
    fig.show()

plot_emotion_distribution(combined_comments, 'Emotion Intensity Distribution between Thailand and USA')

In [8]:
average_fear_intensity = combined_comments.groupby('country')['fear_intensity'].mean().reset_index()

# Plot the average fear intensity comparison
fig = px.bar(average_fear_intensity, x='country', y='fear_intensity', 
             title='Average Fear Intensity Comparison between Thailand and USA',
             labels={'country': 'Country', 'fear_intensity': 'Average Fear Intensity'},
             text='fear_intensity')

# Adjust the layout
fig.update_layout(width=600, height=400)

# Update the text position and format
fig.update_traces(texttemplate='%{text:.2f}', textposition='inside', textfont_size=12)

# Show the plot
fig.show()

In [9]:
average_anger_intensity = combined_comments.groupby('country')['anger_intensity'].mean().reset_index()

# Plot the average anger intensity comparison
fig = px.bar(average_anger_intensity, x='country', y='anger_intensity', 
             title='Average Anger Intensity Comparison between Thailand and USA',
             labels={'country': 'Country', 'anger_intensity': 'Average Anger Intensity'},
             text='anger_intensity',
             color='country',
             color_discrete_sequence=['red', 'red'])

# Adjust the layout
fig.update_layout(width=600, height=400, showlegend=False)

# Update the text position and format
fig.update_traces(texttemplate='%{text:.2f}', textposition='inside', textfont_size=12)

# Show the plot
fig.show()

In [10]:
average_fear_intensity = combined_comments.groupby('country')['joy_intensity'].mean().reset_index()

# Plot the average fear intensity comparison
fig = px.bar(average_fear_intensity, x='country', y='joy_intensity', 
             title='Average Joy Intensity Comparison between Thailand and USA',
             labels={'country': 'Country', 'joy_intensity': 'Average Joy Intensity'},
             text='joy_intensity')

# Adjust the layout
fig.update_layout(width=600, height=400)

# Update the text position and format
fig.update_traces(texttemplate='%{text:.2f}', textposition='inside', textfont_size=12)

# Show the plot
fig.show()

In [11]:
def plot_mean_emotion_comparison(df, title):
    emotions = ['anger_intensity', 'fear_intensity', 'joy_intensity']
    df_melted = df.melt(id_vars=['year', 'country'], value_vars=emotions, var_name='emotion', value_name='intensity')

    # Ensure the year column is treated as a categorical variable and ordered
    df_melted['year'] = pd.Categorical(df_melted['year'], ordered=True)

    # Create subplots for mean comparisons
    fig = make_subplots(rows=1, cols=len(emotions), subplot_titles=[f'{emotion} Mean Comparison' for emotion in emotions])

    # Calculate the mean intensity
    mean_df = df_melted.groupby(['year', 'country', 'emotion'])['intensity'].mean().reset_index()

    # Add mean comparisons
    colors = {'Thailand': 'red', 'USA': 'green'}
    for i, emotion in enumerate(emotions):
        for country in df_melted['country'].unique():
            country_mean_df = mean_df[(mean_df['emotion'] == emotion) & (mean_df['country'] == country)]
            fig.add_trace(go.Scatter(x=country_mean_df['year'], y=country_mean_df['intensity'], mode='markers+lines',
                                     name=f'{country} Mean - {emotion}', legendgroup=country, showlegend=(i == 0),
                                     marker_color=colors[country]),
                          row=1, col=i+1)

    # Update layout
    fig.update_layout(height=400, width=1200, title_text=title)
    fig.show()

# Plot the mean emotion comparison
plot_mean_emotion_comparison(combined_comments, 'Average Emotion Intensity Comparison between Thailand and USA by Year')





In [12]:
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import pandas as pd

def plot_mean_emotion_comparison(df, title):
    emotions = ['anger_intensity', 'fear_intensity', 'joy_intensity']
    df_melted = df.melt(id_vars=['year', 'country'], value_vars=emotions, var_name='emotion', value_name='intensity')

    # Ensure the year column is treated as a categorical variable and ordered
    df_melted['year'] = pd.Categorical(df_melted['year'], ordered=True)

    # Create subplots for Thailand
    fig_thailand = make_subplots(rows=1, cols=len(emotions), subplot_titles=[f'{emotion} Mean Comparison' for emotion in emotions])

    # Create subplots for USA
    fig_usa = make_subplots(rows=1, cols=len(emotions), subplot_titles=[f'{emotion} Mean Comparison' for emotion in emotions])

    # Calculate the mean intensity
    mean_df = df_melted.groupby(['year', 'country', 'emotion'])['intensity'].mean().reset_index()

    # Add mean comparisons for Thailand
    for i, emotion in enumerate(emotions):
        country_mean_df = mean_df[(mean_df['emotion'] == emotion) & (mean_df['country'] == 'Thailand')]
        fig_thailand.add_trace(go.Scatter(x=country_mean_df['year'], y=country_mean_df['intensity'], mode='markers+lines',
                                          name=f'Thailand Average - {emotion}', marker_color='red'),
                               row=1, col=i+1)

    # Add mean comparisons for USA
    for i, emotion in enumerate(emotions):
        country_mean_df = mean_df[(mean_df['emotion'] == emotion) & (mean_df['country'] == 'USA')]
        fig_usa.add_trace(go.Scatter(x=country_mean_df['year'], y=country_mean_df['intensity'], mode='markers+lines',
                                     name=f'USA Average - {emotion}', marker_color='green'),
                          row=1, col=i+1)

    # Update layout for Thailand chart
    fig_thailand.update_layout(height=400, width=1200, title_text=f'{title} (Thailand)')

    # Update layout for USA chart
    fig_usa.update_layout(height=400, width=1200, title_text=f'{title} (USA)')

    # Show the plots
    fig_thailand.show()
    fig_usa.show()

# Plot the mean emotion comparison
plot_mean_emotion_comparison(combined_comments, 'Average Emotion Intensity by Year')





In [13]:
import plotly.express as px
import pandas as pd

def plot_emotion_distribution_by_year(df, title, plot_height=600):  # Add plot_height parameter
    emotions = ['anger_intensity', 'fear_intensity', 'joy_intensity']
    df['year'] = df['year'].astype(int)
    df = df.sort_values('year')
    df_melted = df.melt(id_vars=['year'], value_vars=emotions, var_name='emotion', value_name='intensity')
    num_years = len(df['year'].unique())

    fig = px.box(df_melted, x='year', y='intensity', color='emotion', title=title,
                 labels={'intensity': 'Intensity', 'year': 'Year'},
                 category_orders={"year": sorted(df['year'].unique())},
                 boxmode='group')

    fig.update_traces(boxmean=True)
    fig.update_layout(
        height=plot_height,  # Set plot height
        yaxis_title='Intensity',
        xaxis_title='Year',
        boxgap=0.3,  # Adjust spacing between boxes
        boxgroupgap=0.5,  # Adjust spacing between groups of boxes
        width = num_years * 400 # set width based on number of years 
    )
    fig.show()

# Example usage with adjusted height
plot_emotion_distribution_by_year(combined_comments.copy(), 'Emotion Distribution by Year (All Comments)', plot_height=600)
plot_emotion_distribution_by_year(thailand_comments.copy(), 'Emotion Distribution by Year (Thailand)', plot_height=600)
plot_emotion_distribution_by_year(usa_comments.copy(), 'Emotion Distribution by Year (USA)', plot_height=600)


## How do the mean intensities and yearly trends of fear, anger, and joy related to mask-wearing compare between Thailand and the USA?

### Mean Intensity Comparison:
- **Anger and Joy**: Similar levels in both countries suggest comparable emotional responses to mask-wearing.
- **Fear**: Higher in Thailand, indicating greater anxiety about the pandemic.

### Yearly Trend Analysis:
- **Anger**: Peaked in 2020 for both countries, then declined, reflecting initial frustration with mask mandates that eased over time.
- **Joy**: Lowest in 2020, increasing from 2021 to 2022, likely due to vaccine rollouts and easing restrictions.
- **Fear**:
  - **USA**: Decreased from 2020 to 2022, indicating growing confidence and normalcy.
  - **Thailand**: Remained constant, suggesting ongoing concerns and possibly less effective public health communication.

## Emotion Distribution Monthly

In [14]:
def plot_emotion_distribution_by_month(df, title):
    emotions = ['anger_intensity', 'fear_intensity', 'joy_intensity']
    df_monthly = df.groupby(['year', 'month'])[emotions].mean().reset_index()

    # Convert the month to a string format for better plotting
    df_monthly['month'] = df_monthly['month'].astype(str)

    # Create subplots
    fig = make_subplots(rows=len(emotions), cols=1, shared_xaxes=True, vertical_spacing=0.1,
                        subplot_titles=[f'{emotion.capitalize()} Intensity' for emotion in emotions])

    # Add traces for each emotion
    for i, emotion in enumerate(emotions):
        fig.add_trace(go.Scatter(x=df_monthly['month'], y=df_monthly[emotion], mode='lines', name=f'{emotion.capitalize()}',
                                 line=dict(width=2)),
                      row=i+1, col=1)

    # Update layout
    fig.update_layout(height=900, width=800, title_text=title)
    fig.update_xaxes(tickformat="%b\n%Y")

    fig.show()

# Plot emotion distribution by month for Thailand and USA
plot_emotion_distribution_by_month(thailand_comments, 'Emotion Distribution by Month (Thailand)')
plot_emotion_distribution_by_month(usa_comments, 'Emotion Distribution by Month (USA)')

### Analysis of Anger Intensity in July and August 2022
During July and August 2022, there was a noticeable peak in anger intensity regarding mask-wearing in Thailand. Key themes from the comments during this period include:

- **Anger towards authorities for lifting mask mandates**:
    - On July 1st, Thai officials relaxed COVID-19 measures, allowing Thai people to not wear masks in non-crowded areas.
    - Example: “But why announce that they should take off their masks? They shouldn't take off their masks, they should wear them.“

- **Influence of Foreign Practices**:
    - There was also anger directed towards tourists who were not wearing masks while locals were still required to do so. 


### Analysis of Fear Intensity in September and October 2020
During September and October 2020, there was a noticeable rise in fear intensity regarding mask-wearing in Thailand. Key themes from the comments during this period include:

- **Concern Over a Second Wave**:
    - Example: “There will definitely be a second wave of the outbreak. Everyone should wear a mask when they leave the house.”
- **Inconsistent Mask Usage:**
    - Observations of people not wearing masks, leading to increased anxiety about the spread of COVID-19.
	- Example: “I still see a lot of people walking around without masks.”
-  **Impact of Foreigners and Migrants:**
	- Concerns about mask-wearing habits of foreigners and migrant workers contributing to the spread of the virus.
	- Example: “It’s difficult for Burmese people to wear masks because they have to chew betel nut and spit it out all the time.”

## Mask Wearing Stance Analysis

In [15]:
color_mapping = {
    'Against': 'red',
    'Favorable': 'green',
    'Neutral': 'gray'
}

def plot_stance_distribution_by_year_proportion(df, title, height=300, width=800):
    df['count'] = 1
    yearly_totals = df.groupby(['year', 'pred_stance']).count().reset_index()
    yearly_totals['percent'] = yearly_totals.groupby('year')['count'].transform(lambda x: x / x.sum() * 100)
    
    # Plotting
    fig = px.bar(yearly_totals, x='year', y='percent', color='pred_stance', barmode='stack', title=title,
                 color_discrete_map=color_mapping, height=height, width=width,
                 category_orders={'year': sorted(df['year'].unique())})
    fig.update_layout(yaxis_title='Percentage', xaxis_title='Year')
    fig.show()


plot_stance_distribution_by_year_proportion(thailand_comments, 'Percentage Distribution of Mask Wearing Stance Over the Years (Thailand)', height=300, width=800)
plot_stance_distribution_by_year_proportion(usa_comments, 'Percentage Distribution of Mask Wearing Stance Over the Years (USA)', height=300, width=800)
plot_stance_distribution_by_year_proportion(combined_comments, 'Percentage Distribution of Mask Wearing Stance Over the Years (All)', height=300, width=800)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [16]:
def plot_favorable_stance_distribution_by_year_proportion(df, title, height=300, width=800):
    # Group by year and stance to count the occurrences
    yearly_totals = df.groupby(['year', 'pred_stance']).size().reset_index(name='count')
    
    # Calculate the total count for each year
    total_counts = yearly_totals.groupby('year')['count'].transform('sum')
    
    # Calculate the percentage for each stance within each year
    yearly_totals['percent'] = (yearly_totals['count'] / total_counts) * 100
    
    # Filter to include only favorable stance
    favorable_df = yearly_totals[yearly_totals['pred_stance'] == 'Favorable']
    
    # Ensure the year column is treated as a categorical variable and ordered
    favorable_df['year'] = pd.Categorical(favorable_df['year'], ordered=True)
    
    fig = px.bar(favorable_df, x='year', y='percent', color='pred_stance', title=title,
                 color_discrete_map={'Favorable': 'green'}, height=height, width=width,
                 category_orders={'year': sorted(favorable_df['year'].unique())},
                 text='percent')
    
    # Update layout to show the percentage values inside the bars
    fig.update_traces(texttemplate='%{text:.2f}%', textposition='inside')
    fig.update_layout(yaxis_title='Percentage', xaxis_title='Year')
    fig.show()
# Plot the favorable stance distribution by year for Thai comments in terms of proportion
plot_favorable_stance_distribution_by_year_proportion(thailand_comments, 'Proportion of Favorable Stance Distribution Over the Years (Thailand)', height=400, width=800)

# Plot the favorable stance distribution by year for USA comments in terms of proportion
plot_favorable_stance_distribution_by_year_proportion(usa_comments, 'Proportion of Favorable Stance Distribution Over the Years (USA)', height=400, width=800)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [17]:
def plot_against_stance_distribution_by_year_proportion(df, title, height=300, width=800):
    # Group by year and stance to count the occurrences
    yearly_totals = df.groupby(['year', 'pred_stance']).size().reset_index(name='count')
    
    # Calculate the total count for each year
    total_counts = yearly_totals.groupby('year')['count'].transform('sum')
    
    # Calculate the percentage for each stance within each year
    yearly_totals['percent'] = (yearly_totals['count'] / total_counts) * 100
    
    # Filter to include only favorable stance
    favorable_df = yearly_totals[yearly_totals['pred_stance'] == 'Against']
    
    # Ensure the year column is treated as a categorical variable and ordered
    favorable_df['year'] = pd.Categorical(favorable_df['year'], ordered=True)
    
    fig = px.bar(favorable_df, x='year', y='percent', color='pred_stance', title=title,
                 color_discrete_map={'Against': 'red'}, height=height, width=width,
                 category_orders={'year': sorted(favorable_df['year'].unique())},
                 text='percent')
    
    # Update layout to show the percentage values inside the bars
    fig.update_traces(texttemplate='%{text:.2f}%', textposition='inside')
    fig.update_layout(yaxis_title='Percentage', xaxis_title='Year')
    fig.show()
# Plot the favorable stance distribution by year for Thai comments in terms of proportion
plot_against_stance_distribution_by_year_proportion(thailand_comments, 'Proportion of Against Stance Distribution Over the Years (Thailand)', height=400, width=800)

# Plot the favorable stance distribution by year for USA comments in terms of proportion
plot_against_stance_distribution_by_year_proportion(usa_comments, 'Proportion of Against Stance Distribution Over the Years (USA)', height=400, width=800)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



1.	Thailand:
	- Favorable stance peaked in 2021 and declined in 2022.
	- Against stance proportion peaked in 2021.
2.	USA:
	- Favorable stance continuously declined after 2020.
	- Against stance proportion peaked in 2021.


In [18]:
def calculate_stance_percentages(df):
    # Count the number of each stance by year and month
    stance_counts = df.groupby(['year', 'month', 'pred_stance']).size().reset_index(name='count')

    # Calculate the total number of comments per month
    total_counts = df.groupby(['year', 'month']).size().reset_index(name='total_count')

    # Merge the two DataFrames to calculate percentages
    stance_percentages = pd.merge(stance_counts, total_counts, on=['year', 'month'])
    stance_percentages['percentage'] = (stance_percentages['count'] / stance_percentages['total_count']) * 100

    # Ensure 'month' is a string format that Plotly can handle
    stance_percentages['month'] = stance_percentages['month'].dt.strftime('%Y-%m')

    return stance_percentages

thai_stance_percentages = calculate_stance_percentages(thailand_comments)
english_stance_percentages = calculate_stance_percentages(usa_comments)

def plot_stance_distribution_by_month(df, title):
    # Define custom color mapping
    color_mapping = {
        'Favorable': 'green',
        'Neutral': 'gray',
        'Against': 'red'
    }

    fig = px.line(df, x='month', y='percentage', color='pred_stance', line_group='year', title=title,
                  labels={'month': 'Month', 'percentage': 'Percentage (%)', 'pred_stance': 'Stance'},
                  color_discrete_map=color_mapping)
    fig.update_xaxes(tickformat="%b\n%Y")  # Update x-axis format
    fig.show()

# Plot the stance distribution by month for Thai comments
plot_stance_distribution_by_month(thai_stance_percentages, 'Stance Distribution by Month (Thailand)')

# Plot the stance distribution by month for English comments
plot_stance_distribution_by_month(english_stance_percentages, 'Stance Distribution by Month (USA)')

In [19]:
comments_2021 = combined_comments[combined_comments['event_date'].dt.year == 2021]
def calculate_daily_stance_percentages(df):
    # Count the number of each stance by day
    stance_counts = df.groupby([df['event_date'].dt.date, 'pred_stance']).size().reset_index(name='count')

    # Calculate the total number of comments per day
    total_counts = df.groupby(df['event_date'].dt.date).size().reset_index(name='total_count')

    # Merge the two DataFrames to calculate percentages
    stance_percentages = pd.merge(stance_counts, total_counts, left_on='event_date', right_on='event_date')
    stance_percentages['percentage'] = (stance_percentages['count'] / stance_percentages['total_count']) * 100

    return stance_percentages

# Calculate daily stance percentages for both Thai and English comments
thai_daily_percentages = calculate_daily_stance_percentages(comments_2021[comments_2021['country'] == 'Thailand'])
english_daily_percentages = calculate_daily_stance_percentages(comments_2021[comments_2021['country'] == 'USA'])
def plot_daily_stance_percentages(df, title):
    # Define custom color mapping
    color_mapping = {
        'Favorable': 'green',
        'Neutral': 'gray',
        'Against': 'red'
    }

    fig = px.line(df, x='event_date', y='percentage', color='pred_stance', title=title,
                  labels={'event_date': 'Date', 'percentage': 'Percentage (%)', 'pred_stance': 'Stance'},
                  color_discrete_map=color_mapping)
    fig.update_xaxes(tickformat="%b %d\n%Y")  # Update x-axis format
    fig.show()

# Plot the daily stance percentage distribution for English comments
plot_daily_stance_percentages(english_daily_percentages, 'Daily Stance Percentage Distribution (USA, 2021)')

In [20]:
comments_2020 = combined_comments[combined_comments['event_date'].dt.year == 2020]
def calculate_daily_stance_percentages(df):
    # Count the number of each stance by day
    stance_counts = df.groupby([df['event_date'].dt.date, 'pred_stance']).size().reset_index(name='count')

    # Calculate the total number of comments per day
    total_counts = df.groupby(df['event_date'].dt.date).size().reset_index(name='total_count')

    # Merge the two DataFrames to calculate percentages
    stance_percentages = pd.merge(stance_counts, total_counts, left_on='event_date', right_on='event_date')
    stance_percentages['percentage'] = (stance_percentages['count'] / stance_percentages['total_count']) * 100

    return stance_percentages

# Calculate daily stance percentages for both Thai and English comments
thai_daily_percentages = calculate_daily_stance_percentages(comments_2020[comments_2020['country'] == 'Thailand'])
english_daily_percentages = calculate_daily_stance_percentages(comments_2020[comments_2020['country'] == 'USA'])
def plot_daily_stance_percentages(df, title):
    # Define custom color mapping
    color_mapping = {
        'Favorable': 'green',
        'Neutral': 'gray',
        'Against': 'red'
    }

    fig = px.line(df, x='event_date', y='percentage', color='pred_stance', title=title,
                  labels={'event_date': 'Date', 'percentage': 'Percentage (%)', 'pred_stance': 'Stance'},
                  color_discrete_map=color_mapping)
    fig.update_xaxes(tickformat="%b %d\n%Y")  # Update x-axis format
    fig.show()

# Plot the daily stance percentage distribution for English comments
plot_daily_stance_percentages(english_daily_percentages, 'Daily Stance Percentage Distribution (USA, 2020)')

## Sentiment Analysis by News Publisher

In [21]:
color_mapping = {
    'Against': 'red',
    'Favorable': 'green',
    'Neutral': 'gray'
}

def plot_stance_distribution_by_news_publisher(df, title, height=400, width=800):
    # Group by news publisher and stance to get the counts
    stance_counts = df.groupby(['news_publisher', 'pred_stance']).size().reset_index(name='count')
    
    # Calculate the total counts for each news publisher
    total_counts = stance_counts.groupby('news_publisher')['count'].transform('sum')
    
    # Calculate the percentage for each stance within each news publisher
    stance_counts['percent'] = (stance_counts['count'] / total_counts) * 100
    
    # Create the histogram plot
    fig = px.bar(stance_counts, x='news_publisher', y='percent', color='pred_stance', barmode='group', title=title,
                 color_discrete_map=color_mapping, height=height, width=width,
                 category_orders={'news_publisher': sorted(df['news_publisher'].unique())})
    
    # Update the x-axis to sort categories by total percentage descending
    fig.update_xaxes(categoryorder='total descending')
    
    # Add percentage labels on the bars
    fig.update_traces(texttemplate='%{y:.2f}%', textposition='inside')
    
    fig.show()

# Plot stance distribution by news publisher for USA comments
plot_stance_distribution_by_news_publisher(combined_comments[combined_comments['country'] == 'Thailand'].copy(), 'Proportion of Stance Distribution by News Publisher (Thailand)', height=400, width=800)

In [22]:
color_mapping = {
    'Against': 'red',
    'Favorable': 'green',
    'Neutral': 'gray'
}

def plot_stance_distribution_by_news_publisher(df, title, height=400, width=800):
    # Group by news publisher and stance to get the counts
    stance_counts = df.groupby(['news_publisher', 'pred_stance']).size().reset_index(name='count')
    
    # Calculate the total counts for each news publisher
    total_counts = stance_counts.groupby('news_publisher')['count'].transform('sum')
    
    # Calculate the percentage for each stance within each news publisher
    stance_counts['percent'] = (stance_counts['count'] / total_counts) * 100
    
    # Create the histogram plot
    fig = px.bar(stance_counts, x='news_publisher', y='percent', color='pred_stance', barmode='group', title=title,
                 color_discrete_map=color_mapping, height=height, width=width,
                 category_orders={'news_publisher': sorted(df['news_publisher'].unique())})
    
    # Update the x-axis to sort categories by total percentage descending
    fig.update_xaxes(categoryorder='total descending')
    
    # Add percentage labels on the bars
    fig.update_traces(texttemplate='%{y:.2f}%', textposition='inside')
    
    fig.show()

# Filter for USA comments
usa_comments = combined_comments[combined_comments['country'] == 'USA'].copy()

# Plot stance distribution by news publisher for USA comments
plot_stance_distribution_by_news_publisher(usa_comments, 'Proportion of Stance Distribution by News Publisher (USA)', height=400, width=800)

- For Fox News, the favorable stance has the lowest proportion.
- In contrast, CNN has the highest proportion of favorable stance counts.
- CNN’s favorable stance counts are the highest among the news publishers analyzed, while Fox News’s are the lowest.

This makes sense because CNN is considered left-leaning and tends to favor the Democratic Party, whereas Fox News is right-leaning and supportive of the Republican Party and President Trump. This reflects the observation that Democrats are more likely to favor mask-wearing compared to Republicans.

Reference:
- https://www.google.com/search?q=fox+news+vs+CNN&client=firefox-b-d&sca_esv=4d286f4f3872d725&ei=T7ucZr2tEfbT2roPkdPhqQ0&ved=0ahUKEwj9kPrI0LeHAxX2qVYBHZFpONUQ4dUDCA8&uact=5&oq=fox+news+vs+CNN&gs_lp=Egxnd3Mtd2l6LXNlcnAiD2ZveCBuZXdzIHZzIENOTjIFEAAYgAQyBRAAGIAEMgUQABiABDIFEAAYgAQyBRAAGIAEMgUQABiABDIFEAAYgAQyBRAAGIAEMgUQABiABDIFEAAYgARIkRxQ5wJYyhpwAngBkAEBmAH1AaAB7g-qAQYyLjEzLjG4AQPIAQD4AQGYAhGgAtEOqAIFwgIUEAAYgAQYkQIYtAIYigUY6gLYAQHCAhQQABiABBjjBBi0AhjpBBjqAtgBAcICCxAAGIAEGJECGIoFwgIQEC4YgAQY0QMYQxjHARiKBcICBRAuGIAEwgIKEC4YgAQYQxiKBcICChAAGIAEGEMYigXCAhEQLhiABBiRAhjRAxjHARiKBcICCxAuGIAEGMcBGK8BwgILEC4YgAQY0QMYxwHCAggQLhiABBjUAsICDhAAGIAEGJECGMcDGIoFwgIOEC4YgAQY0QMY1AIYxwHCAggQABiABBjHA5gDBboGBAgBGAeSBwQzLjE0oAePkgE&sclient=gws-wiz-serp
- https://www.pewresearch.org/short-reads/2020/04/01/americans-main-sources-for-political-news-vary-by-party-and-age/

In [23]:
def plot_emotion_intensity_distribution_by_news_publisher(df, title, height=400, width=800):
    emotions = ['anger_intensity', 'fear_intensity', 'joy_intensity']
    df_melted = df.melt(id_vars=['news_publisher'], value_vars=emotions, var_name='emotion', value_name='intensity')

    fig = px.box(df_melted, x='news_publisher', y='intensity', color='emotion', title=title,
                 height=height, width=width, category_orders={'news_publisher': sorted(df['news_publisher'].unique())})
    fig.update_xaxes(categoryorder='total descending')
    fig.show()

# Plot emotion intensity distribution by news publisher for USA comments
plot_emotion_intensity_distribution_by_news_publisher(usa_comments, 'Emotion Intensity Distribution by News Publisher (USA)', height=400, width=800)

In [24]:
color_mapping = {
    'Against': 'red',
    'Favorable': 'green',
    'Neutral': 'gray'
}

def plot_stance_distribution_by_news_publisher(df, title, height=400, width=800):
    fig = px.histogram(df, x='news_publisher', color='pred_stance', barmode='group', title=title,
                       color_discrete_map=color_mapping, height=height, width=width,
                       category_orders={'news_publisher': sorted(df['news_publisher'].unique())},
                       histnorm='percent')  # Plot in terms of proportion
    fig.update_xaxes(categoryorder='total descending')
    fig.show()

# Filter for USA comments
usa_comments = combined_comments[combined_comments['country'] == 'Thailand'].copy()

# Plot stance distribution by news publisher for USA comments
plot_stance_distribution_by_news_publisher(usa_comments, 'Proportion of Stance Distribution by News Publisher (USA)', height=400, width=800)