# This notebook focuses on visualizing sentiment data related to 'Drag Race' and 'Love Island'. It imports necessary datasets, performs preliminary data analysis, and utilizes various plotting techniques to provide visual insights into the sentiment distribution, frequency of unique values, and other relevant information.

Importing necessary libraries and modules for the analysis.

In [None]:
import pandas as pd

Reading another dataset from a CSV file into a pandas DataFrame.

In [None]:
drag_new = pd.read_csv('Notebook_4_new_drag.csv')
love_new = pd.read_csv('Notebook_4_new_loveisland.csv')

In [None]:
tweets_new = pd.concat([drag_new, love_new], axis=0)

Calculating the frequency of unique values in a DataFrame column.

In [None]:
tweets_new['Label'].value_counts()

<s>        4827
nothate    4763
hate        237
<pad>       173
Name: Label, dtype: int64

Renaming the lables

In [None]:
tweets_new['Label'].replace({'<s>': 'nothate', '<pad>': 'hate'}, inplace=True)

In [None]:
selected_columns = tweets_new[['Label']]
# Concatenate selected columns with df1 along columns (axis=1)
tweets_analysis = pd.concat([tweets_df, selected_columns], axis=1)

Renaming columns

In [None]:
tweets_analysis.rename(columns={'Label': 'Pred_after_#_removal', 'Predicted_label': 'Pred_before_#_removal'}, inplace=True)

Importing additional required libraries and modules.

In [None]:
import pandas as pd
import plotly.express as px

# Calculate total counts for each 'dataset' group
total_counts = tweets_analysis.groupby('dataset').size().reset_index(name='total_count')

# Calculate the counts of "hate" for both columns
hate_counts = tweets_analysis[tweets_analysis['Pred_before_#_removal'] == 'hate'].groupby('dataset').size().reset_index(name='hate_count_before')
hate_counts_after = tweets_analysis[tweets_analysis['Pred_after_#_removal'] == 'hate'].groupby('dataset').size().reset_index(name='hate_count_after')

# Merge the counts
counts_df = pd.merge(total_counts, hate_counts, on='dataset', how='left').fillna(0)
counts_df = pd.merge(counts_df, hate_counts_after, on='dataset', how='left').fillna(0)

# Calculate the percentages
counts_df['Before_#_Removal'] = (counts_df['hate_count_before'] / counts_df['total_count']) * 100
counts_df['After_#_Removal'] = (counts_df['hate_count_after'] / counts_df['total_count']) * 100

# Melt the DataFrame to create a grouped bar chart
melted_df = counts_df.melt(id_vars='dataset', value_vars=['Before_#_Removal', 'After_#_Removal'])

# Create the grouped bar chart
fig = px.bar(melted_df,
             x='dataset',
             y='value',
             color='variable',
             barmode='group',
             title='Comparison of Hate Predictions Before and After Removal (Percentage)',
             labels={'value': 'Percentage of Hate', 'variable': 'Prediction Type'},
             color_discrete_map={'Before_#_Removal': '#fdee99', 'After_#_Removal': '#51b27c'})

# Display the percentage values on the bars
fig.update_traces(texttemplate='%{y:.2f}%', textposition='outside')

# Update the layout to make the labels larger (size 16), center the title, set the y-axis range, place the legend at the top right, and set the background color to white
fig.update_layout(
    title={'text': 'Comparison of Tweets Classified as Hate Before and After Hashtag(#) Removal', 'x': 0.5},
    xaxis_title="Dataset",
    yaxis_title="Percentage of Hate",
    yaxis=dict(range=[0, 10]), # Set the y-axis range from 0 to 10
    font=dict(size=16),
    legend=dict(y=1, x=1, xanchor='right'), # Place the legend at the top right
    plot_bgcolor='white', # Set the plot background color to white
    paper_bgcolor='white' # Set the paper (outer) background color to white
)

# Show the plot
fig.show()
