In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import glob

In [None]:
dataframes = []
# read dataframes
for party in glob.glob('*.csv'):
    dataframes.append(pd.read_csv(party))

# cut off everything after 12th of May 2021
for frame in dataframes:
    frame = frame[frame['date'] < '2021-05-12']

In [None]:
labels = ['Afd', 'CDU', 'CSU', 'FDP', 'B90/GÜRNE', 'LINKE', 'SPD']
colors = ['#3D3BFB', '#6F6F6F', '#73F0F8', '#F3FF56', '#52B11F', '#9E49C0', '#E93333']

In [None]:
# Number of Tweets for each party
weight = [len(df) for df in dataframes]
total = sum(weight)
fig1, ax1 = plt.subplots(figsize=(10,10))
ax1.pie(weight, labels=labels, colors=colors, explode=[0.05]*7, shadow=True, startangle=45,autopct=lambda p: f'{p*total/100:.0f}',pctdistance=0.8);

centre_circle = plt.Circle((0,0),0.70,fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle);
fig.savefig('img/pie_number_of_tweets.png', facecolor='white')

In [None]:
# init empty df
monthly_df = pd.DataFrame(columns=labels)
# Generate month form 01.10.2017 to 01.05.2021
months = pd.date_range("2017-09-27", periods=44, freq="M") + pd.Timedelta("1 day")

for ind, month in enumerate(months[:-1]):
    d = {}
    for jnd,party in enumerate(dataframes):
        temp = party[(party['date'] >= str(month)) & (party['date'] < str(months[ind+1]))]
        d[labels[jnd]] = len(temp)
    monthly_df = monthly_df.append(pd.Series(d, name=str(month.month)+'. '+str(month.year)))

In [None]:
monthly_df.plot(kind='bar', stacked=True,figsize=(12,12),color=colors)
plt.title("Number of Tweets per Month from October 2017 to May 2021")
plt.xlabel("Month")
plt.ylabel("Number of Tweets");
plt.savefig('img/monthly_tweets.png', facecolor='white')