In [1]:
import pandas as pd
from datetime import datetime

First we load the speeches.

In [2]:
speeches = pd.read_feather('../speeches_2020_with_sentiments_health.feather')

And group according to sentiments.

In [3]:
speeches_pos = speeches[speeches.sentiment == 'positive']
speeches_pos.shape

(1, 14)

In [4]:
speeches_neg = speeches[speeches.sentiment == 'negative']
speeches_neg.shape

(16, 14)

Then merge speeches with politicians to analyse the professions.

In [5]:
people = pd.read_feather('politicians_cleaned_eng.feather')

In [6]:
speeches_neg = pd.merge(speeches_neg, people, left_on='politicianId', right_on="id")

In [7]:
speeches_neg.cleanedProfession.value_counts()

Law                4
Social Science     3
No Match           3
Economy            2
Ambiguous          2
Natural Science    2
Name: cleanedProfession, dtype: int64

In [8]:
speeches_pos = pd.merge(speeches_pos, people, left_on='politicianId', right_on="id")

In [9]:
speeches_pos.cleanedProfession.value_counts()

Natural Science    1
Name: cleanedProfession, dtype: int64

Next we analyse the age of the politicians.

In [10]:
def get_age(born, speech_date):
    if born:
        born = datetime.strptime(born, '%Y-%m-%d')
        today = speech_date
        return today.year - born.year - ((today.month, today.day) < (born.month, born.day))
    return None

In [11]:
speeches_neg['age'] = speeches_neg.apply(lambda x: get_age(x.birthDate, x.date), axis=1)

In [12]:
speeches_neg.age.max(), speeches_neg.age.min()

(70, 33)

In [13]:
speeches_pos['age'] = speeches_pos.apply(lambda x: get_age(x.birthDate, x.date), axis=1)

In [14]:
speeches_pos.age.max(), speeches_pos.age.min()

(38, 38)

Then merge speeches with factions to analyse the factions of the politicians.

In [15]:
factions = pd.read_feather('factions.feather')

In [16]:
speeches_neg = pd.merge(speeches_neg, factions, left_on='factionId', right_on="id")

In [17]:
speeches_neg.abbreviation.value_counts()

DIE LINKE.    5
FDP           3
CDU/CSU       3
SPD           2
AfD           1
Grüne         1
not found     1
Name: abbreviation, dtype: int64

In [18]:
speeches_pos = pd.merge(speeches_pos, factions, left_on='factionId', right_on="id")

In [19]:
speeches_pos.abbreviation.value_counts()

CDU/CSU    1
Name: abbreviation, dtype: int64

Finally we analyse the gender.

In [20]:
speeches_pos.gender.value_counts()

weiblich    1
Name: gender, dtype: int64

In [21]:
speeches_neg.gender.value_counts()

männlich    9
weiblich    7
Name: gender, dtype: int64