<H1>Check Sentiment Analysis</H1>

In [1]:
import csv
import numpy as np
import pandas as pd
import re
import datetime
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
import statsmodels.stats.diagnostic as dg
from statsmodels.stats.diagnostic import het_white

from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

<h2>Laden der Datensätze</h2>

In [2]:
#Load German Sentiment CSV file.
df_de = pd.read_csv("/Users/tobias/Dev/FOM/Master_Thesis/data/Sentiment_Analysis/Dataset_DE_Bert_Vader.csv", sep=";",
                parse_dates=["created_at"])

#Change the date field
df_de["Date"] = pd.to_datetime(df_de.created_at).apply(lambda x: x.date())

#Change the values of sentiment bert field and add a new field for a numeric value sentiment value 
df_de["sentiment_bert"] = df_de["sentiment_bert"].replace(["neutral", "negative", "positive"], ["NEU", "NEG", "POS"])
df_de["sentiment_bert_value"] = df_de["sentiment_bert"].replace({"NEU": 0, "NEG": -1, "POS": 1})

#Create a new field for a numeric sentiment value
df_de["sentiment_vader_value"] = [1 if i >= 0.05 else -1 if i <= -0.05 else 0 for i in df_de["sentiment_vader"]]

#Create a df for adding the year and week to the df
df_de_week_year = pd.DataFrame([(i.isocalendar().week, i.isocalendar().year, i.isocalendar().weekday) for i in df_de["Date"]],
                               columns=["Week", "Year", "Weekday"])

#Recalulate the week
for each in range(len(df_de_week_year)):
    if ((df_de_week_year.iloc[each]["Weekday"] > 5) & (df_de_week_year.iloc[each]["Week"] != 1)):
        df_de_week_year.iloc[each]["Week"] = df_de_week_year.iloc[each]["Week"] - 1
    elif ((df_de_week_year.iloc[each]["Weekday"] > 5) & (df_de_week_year.iloc[each]["Week"] == 1)):
        df_de_week_year.iloc[each]["Week"] = 52

#Adding the week and year information to the df
df_de[["Week", "Year"]] = df_de_week_year[["Week", "Year"]]

tweet_id_friday_list = []
for each in range(len(df_de)):
    if (df_de["Date"][each].isocalendar().year == 2016) & (df_de["Date"][each].isocalendar().week == 12) & (df_de["Date"][each].isocalendar().weekday == 5):
        tweet_id_friday_list.append(each)
    elif (df_de["Date"][each].isocalendar().year == 2017) & (df_de["Date"][each].isocalendar().week == 15) & (df_de["Date"][each].isocalendar().weekday == 5):
        tweet_id_friday_list.append(each)
    elif (df_de["Date"][each].isocalendar().year == 2018) & (df_de["Date"][each].isocalendar().week == 13) & (df_de["Date"][each].isocalendar().weekday == 5):
        tweet_id_friday_list.append(each)
    elif (df_de["Date"][each].isocalendar().year == 2019) & (df_de["Date"][each].isocalendar().week == 16) & (df_de["Date"][each].isocalendar().weekday == 5):
        tweet_id_friday_list.append(each)
    elif (df_de["Date"][each].isocalendar().year == 2020) & (df_de["Date"][each].isocalendar().week == 15) & (df_de["Date"][each].isocalendar().weekday == 5):
        tweet_id_friday_list.append(each)
    elif (df_de["Date"][each].isocalendar().year == 2020) & (df_de["Date"][each].isocalendar().week == 18) & (df_de["Date"][each].isocalendar().weekday == 5):
        tweet_id_friday_list.append(each)

for each in tweet_id_friday_list:
    df_de["Week"][each] = df_de["Week"][each] + 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_de["Week"][each] = df_de["Week"][each] + 1


In [3]:
#Load English Sentiment CSV file.
df_en = pd.read_csv("/Users/tobias/Dev/FOM/Master_Thesis/data/Sentiment_Analysis/Dataset_Bert_Vader.csv", sep=";",
                parse_dates=["created_at"])

#Change the date field
df_en["Date"] = pd.to_datetime(df_en.created_at).apply(lambda x: x.date())

#Add a new column for a numeric sentiment bert value
df_en["sentiment_bert_value"] = df_en["sentiment_bert"].replace({"NEU": 0, "NEG": -1, "POS": 1})

#Add a new column for a numeric sentiment vader value
df_en["sentiment_vader_value"] = [1 if i >= 0.05 else -1 if i <= -0.05 else 0 for i in df_en["sentiment_vader"]]

#Create a df for adding the year and week to the df
df_en_week_year = pd.DataFrame([(i.isocalendar().week, i.isocalendar().year, i.isocalendar().weekday) for i in df_en["Date"]],
                               columns=["Week", "Year", "Weekday"])

#Recalulate the week
for each in range(len(df_en_week_year)):
    if ((df_en_week_year.iloc[each]["Weekday"] > 5) & (df_en_week_year.iloc[each]["Week"] != 1)):
        df_en_week_year.iloc[each]["Week"] = df_en_week_year.iloc[each]["Week"] - 1
    elif ((df_en_week_year.iloc[each]["Weekday"] > 5) & (df_en_week_year.iloc[each]["Week"] == 1)):
        df_en_week_year.iloc[each]["Week"] = 52
        
#Adding the week and year information to the df
df_en[["Week", "Year"]] = df_en_week_year[["Week", "Year"]]
        
tweet_id_friday_list = []
for each in range(len(df_en)):
    if (df_en["Date"][each].isocalendar().year == 2016) & (df_en["Date"][each].isocalendar().week == 12) & (df_en["Date"][each].isocalendar().weekday == 5):
        tweet_id_friday_list.append(each)
    elif (df_en["Date"][each].isocalendar().year == 2017) & (df_en["Date"][each].isocalendar().week == 15) & (df_en["Date"][each].isocalendar().weekday == 5):
        tweet_id_friday_list.append(each)
    elif (df_en["Date"][each].isocalendar().year == 2018) & (df_en["Date"][each].isocalendar().week == 13) & (df_en["Date"][each].isocalendar().weekday == 5):
        tweet_id_friday_list.append(each)
    elif (df_en["Date"][each].isocalendar().year == 2019) & (df_en["Date"][each].isocalendar().week == 16) & (df_en["Date"][each].isocalendar().weekday == 5):
        tweet_id_friday_list.append(each)
    elif (df_en["Date"][each].isocalendar().year == 2020) & (df_en["Date"][each].isocalendar().week == 15) & (df_en["Date"][each].isocalendar().weekday == 5):
        tweet_id_friday_list.append(each)
    elif (df_en["Date"][each].isocalendar().year == 2020) & (df_en["Date"][each].isocalendar().week == 18) & (df_en["Date"][each].isocalendar().weekday == 5):
        tweet_id_friday_list.append(each)

for each in tweet_id_friday_list:
    df_en["Week"][each] = df_en["Week"][each] + 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_en["Week"][each] = df_en["Week"][each] + 1


In [4]:
#Combine the different DFs
df = pd.concat([df_en, df_de]).reset_index(drop=True)

In [5]:
#Change the values from sentiment_bert_value from text to a num value
df["sentiment_bert_value"] = df["sentiment_bert"].replace({"NEU": 0, "NEG": -1, "POS": 1})

#Create a df for adding the year and week to the df
df_week_year = pd.DataFrame([(i.isocalendar().week, i.isocalendar().year) for i in df["Date"]], columns=["Week", "Year"])
#Adding the week and year information to the df
df[["Week", "Year"]] = df_week_year[["Week", "Year"]]

In [6]:
#Load Wirecard finance CSV file.
df_wdi = pd.read_csv("/Users/tobias/Dev/FOM/Master_Thesis/data/WDI.HM-2.csv", parse_dates=["Date"])
df_wdi = df_wdi.rename(columns={"Adj Close": "Adj_Close"})

#Load Euro Stoxx 50 CSV file.
df_stoxx = pd.read_csv("/Users/tobias/Dev/FOM/Master_Thesis/data/^STOXX50E.csv", parse_dates=["Date"])
#df_stoxx = pd.read_csv("/Users/tobias/Dev/FOM/Master_Thesis/data/^GDAXI.csv", parse_dates=["Date"])
df_stoxx = df_stoxx.rename(columns={"Adj Close": "Adj_Close"})

#Load Visa finance CSV file.
df_visa = pd.read_csv("/Users/tobias/Dev/FOM/Master_Thesis/data/V.csv", parse_dates=["Date"])
df_visa = df_visa.rename(columns={"Adj Close": "Adj_Close"})

#Load Master Card finance CSV file.
df_ma = pd.read_csv("/Users/tobias/Dev/FOM/Master_Thesis/data/MA.csv", parse_dates=["Date"])
df_ma = df_ma.rename(columns={"Adj Close": "Adj_Close"})

<h2>Data Preparation</h2>

In [7]:
#Change Date für df_wdi & df
df_wdi["Date"] = pd.to_datetime(df_wdi.Date).apply(lambda x: x.date())
df_stoxx["Date"] = pd.to_datetime(df_stoxx.Date).apply(lambda x: x.date())
df_visa["Date"] = pd.to_datetime(df_visa.Date).apply(lambda x: x.date())
df_ma["Date"] = pd.to_datetime(df_ma.Date).apply(lambda x: x.date())

In [8]:
def create_finance_df(df):
    #Create df for returning
    df_week = pd.DataFrame(columns=["Date", "Week", "Year", "Adj_Close"])
    
    #Start Week
    week = 5
    volume = 0
    
    #To loop through all lines in df
    for each in range(len(df)):
        #Get the date
        each_date = df["Date"][each]
        adj_close = df["Adj_Close"][each]
        
        #Get the Volume of all transactions in a week
        volume = volume + df["Volume"][each]

        #Check if the date is the last date
        if df["Date"][each] == datetime.date(2020,7,31):
            #Get Week Number, start and end day
            week = each_date.isocalendar().week
            year = each_date.isocalendar().year
            end_day = df["Date"].iloc[-1]
            
            #Add data to df
            new_row = pd.DataFrame([[each_date, week, year, adj_close, volume]],
                                   columns=["Date", "Week", "Year", "Adj_Close", "Volume"])
            df_week = pd.concat([df_week, new_row], ignore_index=True)
        else:
            #Get the next week number for checking
            next_week = df["Date"][each+1].isocalendar().week

            #Check if the next day is in another week 
            if each_date.isocalendar().week != next_week:
                #Get week number, year and last date
                week = each_date.isocalendar().week
                year = each_date.isocalendar().year
                end_day = each_date
                
                #Add data to df
                new_row = pd.DataFrame([[each_date, week, year, adj_close, volume]],
                                       columns=["Date", "Week", "Year", "Adj_Close", "Volume"])
                df_week = pd.concat([df_week, new_row], ignore_index=True)
                
                #Set Volume back to 0
                volume = 0
    
    #Add the yield from one week to another
    df_week["returns_week"] = df_week.Adj_Close.pct_change()+1
    #df_week["returns_week"] = np.log(df_week.Adj_Close.pct_change()+1)
    #df_week["returns_week"] = df_week.Adj_Close.pct_change()
    #df_week["returns_week_log"] = pd.Series(np.log(np.float64(df_week.Adj_Close))).pct_change()
    

    
    #Return the new DF
    return df_week

In [9]:
def concat_finance_df(df_wdi = df_wdi, df_stoxx = df_stoxx, df_ma = df_ma, df_v = df_visa):
    
    #Create week dfs for all financial dfs
    df_wdi_week = create_finance_df(df_wdi)
    df_stoxx_week = create_finance_df(df_stoxx)
    df_ma_week = create_finance_df(df_ma)
    df_v_week = create_finance_df(df_v)
    
    #combine all dfs
    df_week = pd.concat([df_wdi_week, df_stoxx_week.rename(columns={"returns_week": "returns_stoxx"})["returns_stoxx"]],
                        axis=1)
    df_week = pd.concat([df_week, df_ma_week.rename(columns={"returns_week": "returns_ma"})["returns_ma"]], axis=1)
    df_week = pd.concat([df_week, df_v_week.rename(columns={"returns_week": "returns_visa"})["returns_visa"]], axis=1)
    return df_week

In [10]:
def create_final_df(df_week, df, meta_data = False):
    
    #If meta data like Retweets should be used, this will calculate it.
    if meta_data:
        df["sentiment_bert_value"] = df["sentiment_bert_value"] * ((df["retweet_count"] + 1))
        df["sentiment_vader_value"] = df["sentiment_vader_value"] * ((df["retweet_count"] + 1))
    
    df_sum_year_week_bert = pd.DataFrame(df[["Week", "Year", "sentiment_bert_value"]].
                                    groupby(by=["Year", "Week"]).sum()).reset_index()

    df_sum_year_week_vader = pd.DataFrame(df[["Week", "Year", "sentiment_vader_value"]].
                                    groupby(by=["Year", "Week"]).sum()).reset_index()
    
    df_count_tweets_year_week = pd.DataFrame(df[["Week", "Year", "conversation_id"]].
                                    groupby(by=["Year", "Week"]).
             count()).reset_index().rename(columns={"conversation_id": "count_all_tweets"})
    
    df_week = df_week.merge(df_sum_year_week_bert)
    df_week = df_week.merge(df_sum_year_week_vader)
    df_week = df_week.merge(df_count_tweets_year_week)
    
    return df_week

In [11]:
def create_df(df_twitter = df.copy()):
    df_week = concat_finance_df()
    df_week_reg = create_final_df(df_week, df_twitter)
    
    df_week_reg["dif_sentiment_bert_value"] = df_week_reg["sentiment_bert_value"].diff()
    df_week_reg["dif_sentiment_vader_value"] = df_week_reg["sentiment_vader_value"].diff()

    df_week_reg.dropna(inplace=True)
    df_week_reg = df_week_reg.reset_index(drop=True)

    return df_week_reg

<h2>Analysis</h2>

In [16]:
len(df_de)

114444

In [17]:
len(df_en)

111199

In [18]:
len(df)

225643

<p>Verteilung der Tweets auf positiv, negativ und neutral - Deutsch BERT</p>

In [38]:
df_de["sentiment_bert_value"].value_counts()/len(df_de)*100

 0    65.806858
-1    29.406522
 1     4.786621
Name: sentiment_bert_value, dtype: float64

<p>Verteilung der Tweets auf positiv, negativ und neutral - Englisch BERT</p>

In [39]:
df_en["sentiment_bert_value"].value_counts()/len(df_en)*100

 0    54.752291
-1    31.566831
 1    13.680878
Name: sentiment_bert_value, dtype: float64

<p>Verteilung der Tweets auf positiv, negativ und neutral - Deutsch und Englisch BERT</p>

In [40]:
df["sentiment_bert_value"].value_counts()/len(df)*100

 0    60.359063
-1    30.471142
 1     9.169795
Name: sentiment_bert_value, dtype: float64

<p>Verteilung der Tweets auf positiv, negativ und neutral - Deutsch VADER</p>

In [41]:
df_de["sentiment_vader_value"].value_counts()/len(df_de)*100

 1    40.764042
-1    30.322254
 0    28.913705
Name: sentiment_vader_value, dtype: float64

<p>Verteilung der Tweets auf positiv, negativ und neutral - Englisch VADER</p>

In [42]:
df_en["sentiment_vader_value"].value_counts()/len(df_en)*100

-1    34.054263
 0    33.479618
 1    32.466119
Name: sentiment_vader_value, dtype: float64

<p>Verteilung der Tweets auf positiv, negativ und neutral - Deutsch und Englisch VADER</p>

In [43]:
df["sentiment_vader_value"].value_counts()/len(df)*100

 1    36.674747
-1    32.161423
 0    31.163830
Name: sentiment_vader_value, dtype: float64

<p>Tweets nach Jahren - Deutsch und Englisch</p>

In [61]:
df.groupby(["Year"]).count()["conversation_id"]

Year
2016     15039
2017     13305
2018     18169
2019     47019
2020    132111
Name: conversation_id, dtype: int64

<p>Tweets nach Jahren - Deutsch</p>

In [62]:
df_de.groupby(["Year"]).count()["conversation_id"]

Year
2016     7162
2017     6187
2018    11191
2019    25368
2020    64536
Name: conversation_id, dtype: int64

<p>Tweets nach Jahren - Englisch</p>

In [63]:
df_en.groupby(["Year"]).count()["conversation_id"]

Year
2016     7877
2017     7118
2018     6978
2019    21651
2020    67575
Name: conversation_id, dtype: int64

<h2>Copy Data Frames</h2>

In [12]:
df_copy = df.copy()
df_de_copy = df_de.copy()
df_en_copy = df_en.copy()

In [13]:
df_copy["Month"] = [df_copy["Date"][i].month for i in range(len(df_copy))]
df_de_copy["Month"] = [df_de_copy["Date"][i].month for i in range(len(df_de_copy))]
df_en_copy["Month"] = [df_en_copy["Date"][i].month for i in range(len(df_en_copy))]
df_copy["Year_2"] = [df_copy["Date"][i].year for i in range(len(df_copy))]
df_de_copy["Year_2"] = [df_de_copy["Date"][i].year for i in range(len(df_de_copy))]
df_en_copy["Year_2"] = [df_en_copy["Date"][i].year for i in range(len(df_en_copy))]

<p>Deutsch Analyse Ergebnisse mit BERT - Positiv</p>

In [23]:
df_de_copy.loc[df_de_copy["sentiment_bert_value"] == 1].groupby(["Year_2"]).count()["conversation_id"]

Year_2
2016     371
2017     575
2018     605
2019    1146
2020    2781
Name: conversation_id, dtype: int64

<p>Deutsch Analyse Ergebnisse mit BERT - Neutral</p>

In [25]:
df_de_copy.loc[df_de_copy["sentiment_bert_value"] == 0].groupby(["Year_2"]).count()["conversation_id"]

Year_2
2016     4696
2017     3813
2018     8164
2019    17961
2020    40678
Name: conversation_id, dtype: int64

<p>Deutsch Analyse Ergebnisse mit BERT - Negativ</p>

In [26]:
df_de_copy.loc[df_de_copy["sentiment_bert_value"] == -1].groupby(["Year_2"]).count()["conversation_id"]

Year_2
2016     2091
2017     1803
2018     2422
2019     6320
2020    21018
Name: conversation_id, dtype: int64

<h3>VADER Deutsch</h3>

<p>Deutsch Analyse Ergebnisse mit VADER - Positiv</p>

In [27]:
df_de_copy.loc[df_de_copy["sentiment_vader_value"] == 1].groupby(["Year_2"]).count()["conversation_id"]

Year_2
2016     3208
2017     2593
2018     5738
2019    10968
2020    24145
Name: conversation_id, dtype: int64

<p>Deutsch Analyse Ergebnisse mit VADER - Neutral</p>

In [28]:
df_de_copy.loc[df_de_copy["sentiment_vader_value"] == 0].groupby(["Year_2"]).count()["conversation_id"]

Year_2
2016     2171
2017     1482
2018     3002
2019     7242
2020    19193
Name: conversation_id, dtype: int64

<p>Deutsch Analyse Ergebnisse mit VADER - Negativ</p>

In [29]:
df_de_copy.loc[df_de_copy["sentiment_vader_value"] == -1].groupby(["Year_2"]).count()["conversation_id"]

Year_2
2016     1779
2017     2116
2018     2451
2019     7217
2020    21139
Name: conversation_id, dtype: int64

<h3>BERT Englisch</h3>

<p>Englische Analyse Ergebnisse mit BERT - Positiv</p>

In [30]:
df_en_copy.loc[df_en_copy["sentiment_bert_value"] == 1].groupby(["Year_2"]).count()["conversation_id"]

Year_2
2016    1889
2017    1915
2018    2437
2019    3800
2020    5172
Name: conversation_id, dtype: int64

<p>Englische Analyse Ergebnisse mit BERT - Neutral</p>

In [31]:
df_en_copy.loc[df_en_copy["sentiment_bert_value"] == 0].groupby(["Year_2"]).count()["conversation_id"]

Year_2
2016     5409
2017     4902
2018     4082
2019    12036
2020    34455
Name: conversation_id, dtype: int64

<p>Englische Analyse Ergebnisse mit BERT - Negativ</p>

In [32]:
df_en_copy.loc[df_en_copy["sentiment_bert_value"] == -1].groupby(["Year_2"]).count()["conversation_id"]

Year_2
2016      572
2017      308
2018      459
2019     5875
2020    27888
Name: conversation_id, dtype: int64

<h3>VADER Englisch</h3>

<p>Englische Analyse Ergebnisse mit VADER - Positiv</p>

In [33]:
df_en_copy.loc[df_en_copy["sentiment_vader_value"] == 1].groupby(["Year_2"]).count()["conversation_id"]

Year_2
2016     2851
2017     2967
2018     3565
2019     8519
2020    18200
Name: conversation_id, dtype: int64

<p>Englische Analyse Ergebnisse mit VADER - Neutral</p>

In [34]:
df_en_copy.loc[df_en_copy["sentiment_vader_value"] == 0].groupby(["Year_2"]).count()["conversation_id"]

Year_2
2016     4020
2017     3449
2018     2840
2019     7489
2020    19431
Name: conversation_id, dtype: int64

<p>Englische Analyse Ergebnisse mit VADER - Negativ</p>

In [35]:
df_en_copy.loc[df_en_copy["sentiment_vader_value"] == -1].groupby(["Year_2"]).count()["conversation_id"]

Year_2
2016      999
2017      709
2018      573
2019     5703
2020    29884
Name: conversation_id, dtype: int64

<p>Tweets nach Jahren - Deutsch</p>

In [17]:
df_de_copy.groupby(["Year_2"]).count()["conversation_id"]

Year_2
2016     7158
2017     6191
2018    11191
2019    25427
2020    64477
Name: conversation_id, dtype: int64

<p>Tweets nach Jahren - Englisch</p>

In [39]:
df_en_copy.groupby(["Year_2"]).count()["conversation_id"]

Year_2
2016     7870
2017     7125
2018     6978
2019    21711
2020    67515
Name: conversation_id, dtype: int64

<p>Tweets nach Monaten in 2018 - Deutsch und Englisch</p>

In [64]:
df_copy.loc[(df_copy["Year_2"] == 2018)].groupby(["Month"]).count()["conversation_id"]

Month
1     1356
2      998
3      908
4      882
5      971
6     1444
7     1153
8     2146
9     2493
10    2526
11    1947
12    1345
Name: conversation_id, dtype: int64

<p>Tweets nach Monaten in 2020 - Deutsch</p>

In [71]:
df_de_copy.loc[(df_de_copy["Year_2"] == 2020)].groupby(["Month"]).count()["conversation_id"]

Month
1     1737
2     1756
3     1232
4     2944
5     5669
6    28243
7    22896
Name: conversation_id, dtype: int64

<p>Tweets nach Monaten in 2020 - Englisch</p>

In [72]:
df_en_copy.loc[(df_en_copy["Year_2"] == 2020)].groupby(["Month"]).count()["conversation_id"]

Month
1     1436
2      974
3     1033
4     1627
5     4181
6    39137
7    19127
Name: conversation_id, dtype: int64

<p>Sentiment Werte nach Monaten in 2020 - Deutsch VADER</p>

In [63]:
df_de_copy.loc[(df_de_copy["Year_2"] == 2020)].groupby(["Month"]).sum()["sentiment_vader_value"]

Month
1     434
2     408
3     272
4     456
5     994
6    1389
7    -947
Name: sentiment_vader_value, dtype: int64

<p>Sentiment Werte nach Monaten in 2020 - Deutsch und Englisch VADER</p>

In [17]:
df_copy.loc[(df_copy["Year_2"] == 2020)].groupby(["Month"]).sum()["sentiment_vader_value"]

Month
1     886
2     763
3     570
4     620
5    1548
6   -8293
7   -4772
Name: sentiment_vader_value, dtype: int64

<p>Sentiment Werte nach Monaten in 2020 - Deutsch und Englisch BERT</p>

In [18]:
df_copy.loc[(df_copy["Year_2"] == 2020)].groupby(["Month"]).sum()["sentiment_bert_value"]

Month
1     -301
2     -332
3     -187
4    -1213
5    -2716
6   -25820
7   -10384
Name: sentiment_bert_value, dtype: int64

<p>Sentiment Werte nach Jahren und Monaten - Deutsch und Englisch</p>

In [30]:
df_copy.groupby(["Year_2", "Month"]).sum()[["sentiment_vader_value", "sentiment_bert_value"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,sentiment_vader_value,sentiment_bert_value
Year_2,Month,Unnamed: 2_level_1,Unnamed: 3_level_1
2016,2,125,-232
2016,3,175,-197
2016,4,490,-104
2016,5,401,135
2016,6,154,-33
2016,7,349,5
2016,8,287,-51
2016,9,378,-13
2016,10,396,46
2016,11,327,52
