In [1]:
import pandas as pd
from datetime import datetime

First we load the speeches.

In [2]:
speeches = pd.read_feather('../speeches_2020_with_sentiments_senioren.feather')

And group according to sentiments.

In [3]:
speeches_pos = speeches[speeches.sentiment == 'positive']
speeches_pos.shape

(0, 14)

In [4]:
speeches_neg = speeches[speeches.sentiment == 'negative']
speeches_neg.shape

(18, 14)

Then merge speeches with politicians to analyse the professions.

In [5]:
people = pd.read_feather('politicians_cleaned_eng.feather')

In [6]:
speeches_neg = pd.merge(speeches_neg, people, left_on='politicianId', right_on="id")

In [7]:
speeches_neg.cleanedProfession.value_counts()

Law                5
Economy            4
No Match           3
Social Science     3
None               1
Ambiguous          1
Natural Science    1
Name: cleanedProfession, dtype: int64

Next we analyse the age of the politicians.

In [8]:
def get_age(born, speech_date):
    if born:
        born = datetime.strptime(born, '%Y-%m-%d')
        today = speech_date
        return today.year - born.year - ((today.month, today.day) < (born.month, born.day))
    return None

In [9]:
speeches_neg['age'] = speeches_neg.apply(lambda x: get_age(x.birthDate, x.date), axis=1)

In [10]:
speeches_neg.age.max(), speeches_neg.age.min()

(76.0, 33.0)

Then merge speeches with factions to analyse the factions of the politicians.

In [11]:
factions = pd.read_feather('factions.feather')

In [12]:
speeches_neg = pd.merge(speeches_neg, factions, left_on='factionId', right_on="id")

In [13]:
speeches_neg.abbreviation.value_counts()

DIE LINKE.    6
AfD           4
FDP           3
CDU/CSU       2
Grüne         1
not found     1
SPD           1
Name: abbreviation, dtype: int64

Finally we analyse the gender.

In [14]:
speeches_neg.gender.value_counts()

weiblich    11
männlich     6
Name: gender, dtype: int64