In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score #model evaluation

In [2]:
mood_data_sample1 = pd.read_csv('/home/michael/git/master_thesis/data/mood_data_sample1.tsv', sep='\t')
mood_data_sample2 = pd.read_csv('/home/michael/git/master_thesis/data/mood_data_sample2.tsv', sep='\t')

questionnaire_data = data = pd.read_csv('/home/michael/git/master_thesis/data/corrected_data_questionnaires.tsv', sep='\t')
questionnaires_sum_scores = pd.read_csv('/home/michael/git/master_thesis/data/sum_scores.tsv', sep='\t')

#  subject identifier
subj_id = ['02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18',
           '19', '20', '21', '22', '23', '24', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37',
           '38', '39']



# experimental condition: positive first = 1; negative first = 2
order = [1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 1, 1]

# add condition and identifier to data
questionnaire_data['vpn_num'] = subj_id
questionnaire_data['order'] = order

questionnaires_sum_scores['vpn_num'] = subj_id
questionnaires_sum_scores['order'] = order


# clean-up df for further analysis
questionnaires_sum_scores = questionnaires_sum_scores.drop(['neo-extraversion_sum_score',
                                                            'neo-neuroticism_sum_score',
                                                            'neo-aggreeablness_sum_score',
                                                            'mae_sum_score'], axis=1)

## MOOD DATA

# calculate means where previous analysis indicated them as apropriate to make both samples better comparable
erfolg_stolz = mood_data_sample2.loc[: , "Erfolg":"Stolz"]
mood_data_sample2['erfolg_stolz'] = erfolg_stolz.mean(axis=1)

froh_freudig = mood_data_sample2.loc[: , "Fröhlichkeit":"Freude"]
mood_data_sample2['froh_freudig'] = froh_freudig.mean(axis=1)

# drop redundant/non-informative data
mood_data_sample2 = mood_data_sample2.drop(['Trauer', 'Erfolg', 'Stolz', 'Fröhlichkeit', 
                                            'Freude', 'Wärme', 'Frustration', 'Ärger', 
                                            'gelangweilt vs. motiviert', 'negativ vs. positiv', 
                                            'gleichgueltig vs. interesse', 'ruhig vs. nervoes'], axis=1)



mood_data_sample1 = mood_data_sample1.drop(['lustlos vs. neugierig'], axis=1)

# rename columns for data merging between samples
mood_data_sample1 = mood_data_sample1.rename(columns={'niedergeschlagen_bedrueckt_betruebt_traurig':
                                                      'Niedergeschlagenheit',
                                                      'waerme_zuneigung':'Zuneigung',
                                                      'frustration_enttaeuschung':'Enttäuschung',
                                                      'froehlich_freudig_vergnuegt_entzueckt':'froh_freudig',
                                                      'aerger_wut':'Wut',
                                                      'peinlich':'Peinlichkeit',
                                                      'erwartung':'Erwartung'
                                                     })

mood_data = pd.concat([mood_data_sample1, mood_data_sample2])

mood_data = mood_data.sort_values(['code'], ascending=True)

# exclude participants number 25 and 26, as there is no clean questionnaire data for those
mood_data = mood_data[mood_data.code != 1025]
mood_data = mood_data[mood_data.code != 2025]
mood_data = mood_data[mood_data.code != 3025]
mood_data = mood_data[mood_data.code != 1026]
mood_data = mood_data[mood_data.code != 2026]
mood_data = mood_data[mood_data.code != 3026]


# expand identifier and condition list for continous format
mood_data_order = order * 3
mood_data['order'] = mood_data_order
mood_data_vpn_num = subj_id *3
mood_data['vpn_num'] = mood_data_vpn_num

mood_data.reset_index(drop=True, inplace=True)


In [3]:
questionnaires_sum_scores

Unnamed: 0,bdi_sum_score,des_sum_score,sp_sum_score,vpn_num,order
0,8,67,10,2,1
1,3,62,7,3,2
2,0,71,3,4,1
3,0,61,11,5,2
4,8,68,11,6,1
5,7,62,4,7,2
6,6,50,2,8,1
7,6,70,6,9,2
8,7,64,13,10,1
9,4,68,0,11,1


In [3]:
mood_data_sample2.columns

Index(['Zuneigung', 'Enttäuschung', 'Wut', 'Niedergeschlagenheit',
       'Peinlichkeit', 'unangenehm vs. angenehm', 'entspannt vs. angespannt',
       'Erwartung', 'condition', 'code', 'erfolg_stolz', 'froh_freudig'],
      dtype='object')

In [4]:


with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    display(mood_data)



Unnamed: 0,code,erfolg_stolz,Niedergeschlagenheit,Zuneigung,Enttäuschung,froh_freudig,Peinlichkeit,Wut,Erwartung,unangenehm vs. angenehm,entspannt vs. angespannt,condition,order,vpn_num
0,1002,4.0,5,6,4,4.0,1,6,6,7,4,baseline,1,2
1,1003,7.0,2,6,1,6.0,1,2,7,8,7,baseline,2,3
2,1004,7.0,1,7,1,5.0,1,1,8,8,8,baseline,1,4
3,1005,1.0,1,2,1,5.0,1,1,5,5,8,baseline,2,5
4,1006,4.0,1,5,1,6.0,3,1,6,-,-,baseline,1,6
5,1007,7.0,2,8,2,7.0,1,1,6,7,4,baseline,2,7
6,1008,5.0,6,1,1,5.0,1,1,5,5,9,baseline,1,8
7,1009,4.0,1,3,1,6.0,1,1,5,8,7,baseline,2,9
8,1010,5.0,3,3,2,5.0,2,1,6,5,6,baseline,1,10
9,1011,7.0,1,5,1,4.0,1,1,7,7,5,baseline,1,11


In [18]:
mood_data = mood_data[mood_data.code != 1006]
mood_data = mood_data[mood_data.code != 2006]
mood_data = mood_data[mood_data.code != 3006]

mood_data['unangenehm vs. angenehm'] = mood_data['unangenehm vs. angenehm'].astype(int)
mood_data['entspannt vs. angespannt'] = mood_data['entspannt vs. angespannt'].astype(int)

mood_means = mood_data.describe()
mood_means
mood_means.T.to_csv('/home/michael/git/master_thesis/data/mood_merged_descriptive_stats.tsv', sep='\t',
              encoding='utf-8')

In [6]:
questionnaires_sum_scores_expanded = questionnaires_sum_scores.append(questionnaires_sum_scores)
questionnaires_sum_scores_expanded = questionnaires_sum_scores_expanded.append(questionnaires_sum_scores)
questionnaires_sum_scores_expanded

Unnamed: 0,bdi_sum_score,des_sum_score,sp_sum_score,vpn_num,order
0,8,67,10,02,1
1,3,62,7,03,2
2,0,71,3,04,1
3,0,61,11,05,2
4,8,68,11,06,1
...,...,...,...,...,...
31,7,73,13,35,2
32,4,70,15,36,2
33,8,65,10,37,1
34,2,64,23,38,1


In [7]:
mood_only_df = mood_data.drop(['code'], axis=1)
mood_only_df = pd.melt(mood_only_df,id_vars=['vpn_num', 'condition', 'order'],var_name='emotion', value_name='value')
mood_only_df = mood_only_df.sort_values(by='emotion')

In [8]:
questionnaires_sum_scores_expanded = pd.DataFrame(np.repeat(questionnaires_sum_scores.values,3,axis=0))
questionnaires_sum_scores_expanded.columns = questionnaires_sum_scores.columns
questionnaires_sum_scores_expanded = questionnaires_sum_scores_expanded.drop(['vpn_num', 'order'], axis=1)
display(questionnaires_sum_scores_expanded)

Unnamed: 0,bdi_sum_score,des_sum_score,sp_sum_score
0,8,67,10
1,8,67,10
2,8,67,10
3,3,62,7
4,3,62,7
...,...,...,...
103,2,64,23
104,2,64,23
105,5,63,0
106,5,63,0


In [9]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(mood_only_df)

Unnamed: 0,vpn_num,condition,order,emotion,value
377,19,negative,1,Enttäuschung,3
401,7,positive,2,Enttäuschung,3
400,6,positive,1,Enttäuschung,2
399,5,positive,2,Enttäuschung,1
398,4,positive,1,Enttäuschung,1
397,3,positive,2,Enttäuschung,2
396,2,positive,1,Enttäuschung,4
395,39,negative,1,Enttäuschung,1
394,38,negative,1,Enttäuschung,3
393,37,negative,1,Enttäuschung,7


In [10]:
# create dictionary to match valence to emotion for later analysis

emotion = ['erfolg_stolz', 'Niedergeschlagenheit', 'Zuneigung',
               'Enttäuschung', 'froh_freudig', 'Peinlichkeit',
               'Wut', 'Erwartung', 'unangenehm vs. angenehm',
               'entspannt vs. angespannt']

valence = ['pos', 'neg', 'pos', 'neg', 'pos', 'neg', 'neg', 'pos', 'pos', 'neg']

# Get pairs of elements
zip_iterator = zip(emotion, valence)

# convert to dict
emot_valence_dictionary = dict(zip_iterator)


# split dataframe by condition
grouped = mood_only_df.groupby(mood_only_df.emotion)

reg_df = pd.DataFrame()

# iterate over split dataframes
for name, group in grouped:
    print(name)
    for key, value in emot_valence_dictionary.items():
        if name == key:  # match emotion in group with emotion in dict
            #print('match!')
            group['valence'] = value  # add valence to df
            group = group.sort_values(['vpn_num', 'condition'])
            group.reset_index(drop=True, inplace=True)
            questionnaires_sum_scores_expanded.reset_index(drop=True, inplace=True)
            group['bdi_sum_score'] = questionnaires_sum_scores_expanded['bdi_sum_score']  # add valence to df   
            group['des_sum_score'] = questionnaires_sum_scores_expanded['des_sum_score']           
            group['sp_sum_score'] = questionnaires_sum_scores_expanded['sp_sum_score']
            #group = pd.concat([group, questionnaires_sum_scores_expanded], axis=1)

            reg_df = reg_df.append(group)  # write to dataframe

reg_df.reset_index(drop=True, inplace=True)
reg_df = reg_df.rename(columns={'vpn_num':'id'})
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(reg_df)

Enttäuschung
Erwartung
Niedergeschlagenheit
Peinlichkeit
Wut
Zuneigung
entspannt vs. angespannt
erfolg_stolz
froh_freudig
unangenehm vs. angenehm


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,id,condition,order,emotion,value,valence,bdi_sum_score,des_sum_score,sp_sum_score
0,2,baseline,1,Enttäuschung,4,neg,8,67,10
1,2,negative,1,Enttäuschung,4,neg,8,67,10
2,2,positive,1,Enttäuschung,4,neg,8,67,10
3,3,baseline,2,Enttäuschung,1,neg,3,62,7
4,3,negative,2,Enttäuschung,6,neg,3,62,7
5,3,positive,2,Enttäuschung,2,neg,3,62,7
6,4,baseline,1,Enttäuschung,1,neg,0,71,3
7,4,negative,1,Enttäuschung,1,neg,0,71,3
8,4,positive,1,Enttäuschung,1,neg,0,71,3
9,5,baseline,2,Enttäuschung,1,neg,0,61,11


In [11]:
reg_df

Unnamed: 0,id,condition,order,emotion,value,valence,bdi_sum_score,des_sum_score,sp_sum_score
0,02,baseline,1,Enttäuschung,4,neg,8,67,10
1,02,negative,1,Enttäuschung,4,neg,8,67,10
2,02,positive,1,Enttäuschung,4,neg,8,67,10
3,03,baseline,2,Enttäuschung,1,neg,3,62,7
4,03,negative,2,Enttäuschung,6,neg,3,62,7
...,...,...,...,...,...,...,...,...,...
1075,38,negative,1,unangenehm vs. angenehm,8,pos,2,64,23
1076,38,positive,1,unangenehm vs. angenehm,8,pos,2,64,23
1077,39,baseline,1,unangenehm vs. angenehm,7,pos,5,63,0
1078,39,negative,1,unangenehm vs. angenehm,7,pos,5,63,0


In [12]:
reg_df.to_csv('/home/michael/git/master_thesis/data/mood_&_questionnaire_data_long_format.tsv', sep='\t',
              encoding='utf-8', index=False)