In [None]:
import pandas as pd

# Imports from plotting
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator

# sentiment discovery imports
from textblob import TextBlob

# utilities
from tqdm.auto import tqdm
import datetime

In [None]:
df = pd.read_pickle('pre-processed-data.pkl')

In [None]:
# perfrom sentiment analysis using textblob
# Does ~4k iterations per second
polarity = []
sentiment = []
for tweet in tqdm(df['cleaned_tweets']):
    blob = TextBlob(tweet)
    pol = 0
    for sentence in blob.sentences:
        pol += sentence.sentiment.polarity
    polarity.append(pol)
    sentiment.append('positive' if pol > 0.5 else 'negative')

In [None]:
posting_times = [i.timestamp() for i in df['created_at']]

In [None]:
# Add columns to dataframe
df['polarity'] = polarity
df['sentiment'] = sentiment
df['timestamps'] = posting_times

In [None]:
sns.set_theme(style="whitegrid")

fig, ax = plt.subplots()

sns.violinplot(data=df, y='language', x='timestamps', orient='h', inner=None,
               ax=ax, hue='sentiment', split=True, palette={"positive": "r", "negative": "b"})

# setting axis ticks
plt.xticks([1618963200 + i*86400 for i in range(4)], ['21-04-21','21-04-22','21-04-23','21-04-24'])
ax.xaxis.set_minor_locator(MultipleLocator(86400/4))
ax.xaxis.set_minor_formatter(lambda x, i: str(int((x%86400)//3600)) + ':00')
ax.tick_params(which='minor', labelsize=7)
ax.tick_params(which='major', length=12)

# setting labels
plt.ylabel('Language')
plt.xlabel('Time (UTC)')
plt.title('Activity and sentiment in differnet languages');