Documentation for the Master thesis "Fanfiction Semantics - A Quantitative Analysis of Sensitive Topics in German Fanfiction" by Julian Jacopo Häußler, Date of submission: September 19, 2022.

# 7.3 Word Embedding Model Based Sentiment Analysis B

## Overview

### - load libraries and read in data
### - define sentiment clusters

# LOAD LIBRARIES AND READ IN DATA

In [1]:
# load libraries

import numpy as np
import pickle
import pandas as pd
from itertools import cycle
from gensim.models import KeyedVectors
from sklearn.preprocessing import normalize
from sklearn.decomposition import PCA
from sklearn.cluster import AffinityPropagation as AF

In [2]:
# define paths

path_data = r'C:\Users\Public\Data\Masterarbeit\data_bawl'
path_models = r'C:\Users\Public\Data\Masterarbeit\models_3.0'
path_pickled = r'C:\Users\Public\Data\Masterarbeit\results\bawl_cluster'

In [3]:
# load models

modelPotter = KeyedVectors.load(path_models + '\\modelPotter2021H.kv')

In [4]:
modelBiss = KeyedVectors.load(path_models + '\\modelBiss2021H.kv')

In [5]:
modelWarriorCats = KeyedVectors.load(path_models + '\\modelWarriorCats2021H.kv')

In [6]:
modelDFFF = KeyedVectors.load(path_models + '\\modelDFFF2021H.kv')

In [7]:
modelMittelerde = KeyedVectors.load(path_models + '\\modelMittelerde2021H.kv')

In [8]:
modelJackson = KeyedVectors.load(path_models + '\\modelJackson2021H.kv')

In [9]:
modelPanem = KeyedVectors.load(path_models + '\\modelPanem2021H.kv')

In [10]:
modelPotterOriginals = KeyedVectors.load(path_models + '\\modelPotterOriginalsH.kv')

# DEFINE SENTIMENT CLUSTERS

The following code blocks are taken from Brottrager et al.'s "Character Shifts in Harry Potter Fanfictions", the relevant Jupyter Notebook can be found under https://github.com/jbrottrager/character-shifts-HPFFS/blob/main/scripts/09_BAWLcluster.ipynb (last viewed: 2022/09/18)

In [12]:
# Load BAWL with word frequency of at least 50

bawl = pd.read_csv(path_data + '\\BAWLR_with_freqs.csv')
bawl_words = list(bawl[(bawl["POTTER_FREQ"] >= 50) 
                  & (bawl["BISS_FREQ"] >= 50)
                  & (bawl["WARRIORCATS_FREQ"] >= 50)
                  & (bawl["DFFF_FREQ"] >= 50)
                  & (bawl["MITTELERDE_FREQ"] >= 50)
                  & (bawl["JACKSON_FREQ"] >= 50)
                  & (bawl["PANEM_FREQ"] >= 50)
                  & (bawl["POTTERORIGINALS_FREQ"] >= 50)]["WORD_LOWER"]
                 )

In [13]:
bawl_words

['abend',
 'achten',
 'ahnung',
 'allein',
 'angriff',
 'angst',
 'annehmen',
 'antwort',
 'antworten',
 'arbeit',
 'arbeiten',
 'aufgabe',
 'aufhalten',
 'aufhören',
 'aufnehmen',
 'auge',
 'baum',
 'beben',
 'bedeuten',
 'beenden',
 'befehl',
 'befinden',
 'befreien',
 'begegnen',
 'begeistern',
 'begleiten',
 'behandeln',
 'behaupten',
 'beispiel',
 'bekennen',
 'bekommen',
 'bereiten',
 'berichten',
 'berühren',
 'bestehen',
 'bestimmen',
 'besuch',
 'besuchen',
 'betrachten',
 'bewegen',
 'bewegung',
 'beweisen',
 'bieten',
 'bild',
 'bilden',
 'bitte',
 'bitten',
 'bleiben',
 'blick',
 'blut',
 'boden',
 'brauchen',
 'brechen',
 'brennen',
 'bruder',
 'brust',
 'buch',
 'chance',
 'dank',
 'dauern',
 'decke',
 'ding',
 'drängen',
 'dringen',
 'drohen',
 'drücken',
 'ehe',
 'ehrlich',
 'eindruck',
 'einsetzen',
 'eintreten',
 'empfinden',
 'ende',
 'enden',
 'entdecken',
 'entfernen',
 'entsetzt',
 'erde',
 'erfahren',
 'erfüllen',
 'ergreifen',
 'erhalten',
 'erheben',
 'erinnern

## 1. POTTER

### 1. High Valence

In [14]:
# Define relevant valence words by threshold value

emo_mean = 2
emo_std = 1

high_valence_words = list(bawl[(bawl["POTTER_FREQ"] >= 50) & (bawl["EMO_MEAN"] >= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

# Define relevant valence words by ranking

top_n = 25
emo_std = 1

high_valence_words = list(bawl[(bawl["POTTER_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[-top_n:]

# Get vectors for valence words

valence_words_index = [modelPotter.key_to_index[word] for word in high_valence_words]
valence_word_vectors = [modelPotter.get_vector(i) for i in valence_words_index]

# Normalize vectors on unit circle

valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

# Cluster with Affinity Propagation on PCA

valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)

# Tranform cluster to dictionary

valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = high_valence_words[i]
    valence_sem_groups[label].append(word)

print(valence_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_valence_words)[class_members])))
    for member1 in np.array(high_valence_words)[class_members]:
        for member2 in np.array(high_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelPotter.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))                        
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

high_val_Potter = list(suggestion_values[0][1])
with open(path_pickled + "\\high_val_Potter.pkl", "wb") as f:
    pickle.dump(high_val_Potter, f)

{3: ['sommer', 'zuhause', 'ferien'], 5: ['wahrheit', 'freund', 'glück', 'freude', 'lieben', 'freiheit', 'liebe'], 4: ['lebendig', 'heilung', 'gesund'], 2: ['erdbeere', 'sex', 'küssen'], 0: ['idylle', 'harmonie', 'frieden', 'kreativ', 'sonne', 'paradies'], 1: ['super', 'lachen', 'prima']}
a: ITEMS ['idylle', 'harmonie', 'frieden', 'kreativ', 'sonne', 'paradies']
a: AVG SIMILARITY 0.1797077973683675
a: MEAN VALENCE 2.5882352941176467
a: MEAN VALENCE STD 0.6100949839784297
a: MEAN AROUSAL 2.221132897603486
a: MEAN AROUSAL STD 1.226280752186
a: SUGGESTION VALUE 2.8917228420018213
b: ITEMS ['super', 'lachen', 'prima']
b: AVG SIMILARITY 0.3320593883593877
b: MEAN VALENCE 2.615686274509804
b: MEAN VALENCE STD 0.5583570833575552
b: MEAN AROUSAL 2.920760233918129
b: MEAN AROUSAL STD 1.2482180075712401
b: SUGGESTION VALUE 2.766015954357159
c: ITEMS ['erdbeere', 'sex', 'küssen']
c: AVG SIMILARITY 0.26240985095500946
c: MEAN VALENCE 2.572549019607843
c: MEAN VALENCE STD 0.6767196715932847
c: MEAN 



### 2. Low Valence

In [15]:
# Define relevant valence words by threshold value

emo_mean = -2
emo_std = 1

low_valence_words = list(bawl[(bawl["POTTER_FREQ"] >= 50) & (bawl["EMO_MEAN"] <= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

# Define relevant valence words by ranking

bot_n = 25
emo_std = 1

low_valence_words = list(bawl[(bawl["POTTER_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[:bot_n]

# Get vectors for valence words

valence_words_index = [modelPotter.key_to_index[word] for word in low_valence_words]
valence_word_vectors = [modelPotter.get_vector(i) for i in valence_words_index]

# Normalize vectors on unit circle

valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)

# Tranform cluster to dictionary

valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = low_valence_words[i]
    valence_sem_groups[label].append(word)
    
print(valence_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_valence_words)[class_members])))
    for member1 in np.array(low_valence_words)[class_members]:
        for member2 in np.array(low_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelPotter.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))
    
low_val_Potter = list(suggestion_values[0][1])
with open(path_pickled + "\\low_val_Potter.pkl", "wb") as f:
    pickle.dump(low_val_Potter, f)

{0: ['krieg', 'nazi', 'weltkrieg', 'massaker'], 1: ['alptraum', 'folter', 'mord', 'qual', 'gewalt'], 3: ['foltern', 'tod', 'töten', 'morden', 'tyrann', 'angst', 'zerstören', 'tot', 'unmensch', 'hassen'], 2: ['pest'], 5: ['tumor', 'tote', 'gift'], 4: ['bombe'], 6: ['erzfeind']}
a: ITEMS ['krieg', 'nazi', 'weltkrieg', 'massaker']
a: AVG SIMILARITY 0.3960659106572469
a: MEAN VALENCE -2.855882352941176
a: MEAN VALENCE STD 0.4074742589879832
a: MEAN AROUSAL 4.617063492063492
a: MEAN AROUSAL STD 0.6965397001468534
a: SUGGESTION VALUE 3.4574759305665195
b: ITEMS ['alptraum', 'folter', 'mord', 'qual', 'gewalt']
b: AVG SIMILARITY 0.35311315655708314
b: MEAN VALENCE -2.7599999999999993
b: MEAN VALENCE STD 0.5646860082200025
b: MEAN AROUSAL 4.4300260455059215
b: MEAN AROUSAL STD 0.764019512744706
b: SUGGESTION VALUE 3.252510582797069
c: ITEMS ['pest']
c: MEAN VALENCE -2.8
c: MEAN VALENCE STD 0.4216370213557839
c: MEAN AROUSAL 4.0
c: MEAN AROUSAL STD 1.0846522890932808
c: SUGGESTION VALUE 0
d: ITE



### 3. High Arousal

In [16]:
# Define relevant arousal words by threshold value

arousal_mean = 2
arousal_std = 1

high_arousal_words = list(bawl[(bawl["POTTER_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] >= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

# Define relevant arousal words by ranking

top_n = 25
arousal_std = 1

high_arousal_words = list(bawl[(bawl["POTTER_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[-top_n:]

# Get vectors for arousal words

arousal_words_index = [modelPotter.key_to_index[word] for word in high_arousal_words]
arousal_word_vectors = [modelPotter.get_vector(i) for i in arousal_words_index]

# Normalize vectors on unit circle

arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)

# Tranform cluster to dictionary

arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = high_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_arousal_words)[class_members])))
    for member1 in np.array(high_arousal_words)[class_members]:
        for member2 in np.array(high_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelPotter.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + sum(arousal_means) / len(arousal_means) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(high_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))

# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

high_arousal_Potter = list(suggestion_values[0][1])
with open(path_pickled + "\\high_arousal_Potter.pkl", "wb") as f:
    pickle.dump(high_arousal_Potter, f)



{0: ['trauma', 'tumor', 'alptraum'], 3: ['schrei', 'alarm', 'erdbeben'], 6: ['furcht', 'atemnot', 'ekstase', 'panik'], 1: ['erotik'], 2: ['hassen', 'foltern', 'brutal', 'folter'], 7: ['mord', 'attentat'], 5: ['terror', 'unheil', 'bestie', 'krieg', 'massaker', 'weltkrieg', 'nazi'], 4: ['notfall']}
a: ITEMS ['trauma', 'tumor', 'alptraum']
a: AVG SIMILARITY 0.35044728716214496
a: MEAN VALENCE -2.5166666666666666
a: MEAN VALENCE STD 0.6764246840112254
a: MEAN AROUSAL 4.480392156862746
a: MEAN AROUSAL STD 0.769331271683298
a: SUGGESTION VALUE 4.701703398006936
b: ITEMS ['erotik']
b: MEAN VALENCE 2.3
b: MEAN VALENCE STD 0.4830458915396481
b: MEAN AROUSAL 4.4375
b: MEAN AROUSAL STD 0.8139410298049853
b: SUGGESTION VALUE 0
c: ITEMS ['hassen', 'foltern', 'brutal', 'folter']
c: AVG SIMILARITY 0.32142750918865204
c: MEAN VALENCE -2.525
c: MEAN VALENCE STD 0.58078709
c: MEAN AROUSAL 4.524853801169591
c: MEAN AROUSAL STD 0.7965188691286542
c: SUGGESTION VALUE 4.922511558329824
d: ITEMS ['schrei', '

### 4. Low Arousal

In [17]:
# Define relevant arousal words by threshold value

arousal_mean = 2
arousal_std = 1

low_arousal_words = list(bawl[(bawl["POTTER_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] <= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

# Define relevant arousal words by ranking

bot_n = 25
arousal_std = 1

low_arousal_words = list(bawl[(bawl["POTTER_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[:bot_n]

# Get vectors for arousal words

arousal_words_index = [modelPotter.key_to_index[word] for word in low_arousal_words]
arousal_word_vectors = [modelPotter.get_vector(i) for i in arousal_words_index]

# Normalize vectors on unit circle

arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)

# Tranform cluster to dictionary

arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = low_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_arousal_words)[class_members])))
    for member1 in np.array(low_arousal_words)[class_members]:
        for member2 in np.array(low_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelPotter.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + (4 - sum(arousal_means) / len(arousal_means)) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(low_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

low_arousal_Potter = list(suggestion_values[0][1])
with open(path_pickled + "\\low_arousal_Potter.pkl", "wb") as f:
    pickle.dump(low_arousal_Potter, f)



{0: ['schlaf', 'erholung', 'pause', 'friede'], 4: ['still', 'zahm', 'zaghaft', 'passiv', 'wenig'], 2: ['wiege', 'aquarium', 'beutel', 'schale', 'murmel', 'kamin'], 1: ['balsam', 'weich', 'seide', 'seife'], 3: ['buche', 'wiese', 'birke', 'linde', 'harfe', 'baum']}
a: ITEMS ['schlaf', 'erholung', 'pause', 'friede']
a: AVG SIMILARITY 0.29807837307453156
a: MEAN VALENCE 1.9375
a: MEAN VALENCE STD 0.9630955734305765
a: MEAN AROUSAL 1.2950832964764853
a: MEAN AROUSAL STD 0.5852768358077337
a: SUGGESTION VALUE 2.9387516324196308
b: ITEMS ['balsam', 'weich', 'seide', 'seife']
b: AVG SIMILARITY 0.2760186766584714
b: MEAN VALENCE 1.2125000000000001
b: MEAN VALENCE STD 0.7841843774999999
b: MEAN AROUSAL 1.4071207430340558
b: MEAN AROUSAL STD 0.649377090719572
b: SUGGESTION VALUE 2.697841456556106
c: ITEMS ['wiege', 'aquarium', 'beutel', 'schale', 'murmel', 'kamin']
c: AVG SIMILARITY 0.24265015721321107
c: MEAN VALENCE 0.9733333333333333
c: MEAN VALENCE STD 0.9195433433333334
c: MEAN AROUSAL 1.417

## 2. BISS

### 1. High Valence

In [18]:
# Define relevant valence words by threshold value

emo_mean = 2
emo_std = 1

high_valence_words = list(bawl[(bawl["BISS_FREQ"] >= 50) & (bawl["EMO_MEAN"] >= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

# Define relevant valence words by ranking

top_n = 25
emo_std = 1

high_valence_words = list(bawl[(bawl["BISS_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[-top_n:]

# Get vectors for valence words

valence_words_index = [modelBiss.key_to_index[word] for word in high_valence_words]
valence_word_vectors = [modelBiss.get_vector(i) for i in valence_words_index]

# Normalize vectors on unit circle

valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

# Cluster with Affinity Propagation on PCA

valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)

# Tranform cluster to dictionary

valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = high_valence_words[i]
    valence_sem_groups[label].append(word)

print(valence_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_valence_words)[class_members])))
    for member1 in np.array(high_valence_words)[class_members]:
        for member2 in np.array(high_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelBiss.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))                        
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

high_val_Biss = list(suggestion_values[0][1])
with open(path_pickled + "\\high_val_Biss.pkl", "wb") as f:
    pickle.dump(high_val_Biss, f)

{0: ['fantasie', 'musik', 'wahrheit', 'freiheit', 'liebe'], 1: ['perfekt', 'super', 'prima', 'gesund'], 4: ['positiv', 'lebendig', 'glück', 'lachen', 'freude', 'lieben'], 2: ['freizeit', 'urlaub', 'zuhause', 'sommer', 'ferien', 'heilung'], 3: ['erdbeere', 'freund', 'sex', 'küssen']}
a: ITEMS ['fantasie', 'musik', 'wahrheit', 'freiheit', 'liebe']
a: AVG SIMILARITY 0.13676601890474557
a: MEAN VALENCE 2.6
a: MEAN VALENCE STD 0.5196579172897808
a: MEAN AROUSAL 3.074093567251462
a: MEAN AROUSAL STD 1.2402510632084776
a: SUGGESTION VALUE 2.829654192160557
b: ITEMS ['perfekt', 'super', 'prima', 'gesund']
b: AVG SIMILARITY 0.22357503324747086
b: MEAN VALENCE 2.575
b: MEAN VALENCE STD 0.6275
b: MEAN AROUSAL 2.681798245614035
b: MEAN AROUSAL STD 1.1263849832416006
b: SUGGESTION VALUE 2.6389552721312595
c: ITEMS ['freizeit', 'urlaub', 'zuhause', 'sommer', 'ferien', 'heilung']
c: AVG SIMILARITY 0.2142697848379612
c: MEAN VALENCE 2.5083333333333333
c: MEAN VALENCE STD 0.5758275182782525
c: MEAN ARO



### 2. Low Valence

In [19]:
# Define relevant valence words by threshold value

emo_mean = -2
emo_std = 1

low_valence_words = list(bawl[(bawl["BISS_FREQ"] >= 50) & (bawl["EMO_MEAN"] <= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

# Define relevant valence words by ranking

bot_n = 25
emo_std = 1

low_valence_words = list(bawl[(bawl["BISS_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[:bot_n]

# Get vectors for valence words

valence_words_index = [modelBiss.key_to_index[word] for word in low_valence_words]
valence_word_vectors = [modelBiss.get_vector(i) for i in valence_words_index]

# Normalize vectors on unit circle

valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)

# Tranform cluster to dictionary

valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = low_valence_words[i]
    valence_sem_groups[label].append(word)
    
print(valence_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_valence_words)[class_members])))
    for member1 in np.array(low_valence_words)[class_members]:
        for member2 in np.array(low_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelBiss.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))
    
low_val_Biss = list(suggestion_values[0][1])
with open(path_pickled + "\\low_val_Biss.pkl", "wb") as f:
    pickle.dump(low_val_Biss, f)

{1: ['krieg', 'tod', 'mord', 'folter', 'strafe', 'unfall', 'grab'], 0: ['alptraum', 'angst', 'schlecht'], 2: ['foltern', 'töten', 'gewalt', 'qual', 'zerstören', 'gift', 'tot', 'hassen', 'leiche', 'leblos'], 4: ['tote', 'bombe', 'waffe', 'gefängnis'], 3: ['negativ']}
a: ITEMS ['alptraum', 'angst', 'schlecht']
a: AVG SIMILARITY 0.25038673480351764
a: MEAN VALENCE -2.566666666666667
a: MEAN VALENCE STD 0.6672204769379256
a: MEAN AROUSAL 4.229380641145347
a: MEAN AROUSAL STD 0.6973534324074256
a: SUGGESTION VALUE 2.4887003392616567
b: ITEMS ['krieg', 'tod', 'mord', 'folter', 'strafe', 'unfall', 'grab']
b: AVG SIMILARITY 0.21646292365732647
b: MEAN VALENCE -2.6214285714285714
b: MEAN VALENCE STD 0.6011920502506946
b: MEAN AROUSAL 4.21905955364602
b: MEAN AROUSAL STD 0.837286669273281
b: SUGGESTION VALUE 3.1472607341826158
c: ITEMS ['foltern', 'töten', 'gewalt', 'qual', 'zerstören', 'gift', 'tot', 'hassen', 'leiche', 'leblos']
c: AVG SIMILARITY 0.20024685232589642
c: MEAN VALENCE -2.56852941



### 3. High Arousal

In [20]:
# Define relevant arousal words by threshold value

arousal_mean = 2
arousal_std = 1

high_arousal_words = list(bawl[(bawl["BISS_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] >= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

# Define relevant arousal words by ranking

top_n = 25
arousal_std = 1

high_arousal_words = list(bawl[(bawl["BISS_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[-top_n:]

# Get vectors for arousal words

arousal_words_index = [modelBiss.key_to_index[word] for word in high_arousal_words]
arousal_word_vectors = [modelBiss.get_vector(i) for i in arousal_words_index]

# Normalize vectors on unit circle

arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)

# Tranform cluster to dictionary

arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = high_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_arousal_words)[class_members])))
    for member1 in np.array(high_arousal_words)[class_members]:
        for member2 in np.array(high_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelBiss.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + sum(arousal_means) / len(arousal_means) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(high_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))

# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

high_arousal_Biss = list(suggestion_values[0][1])
with open(path_pickled + "\\high_arousal_Biss.pkl", "wb") as f:
    pickle.dump(high_arousal_Biss, f)

{4: ['tot', 'tote', 'waffe', 'unfall', 'gewalt', 'mord', 'notfall', 'krieg'], 3: ['drohung', 'sex', 'foltern', 'hassen', 'brutal', 'folter'], 0: ['geburt', 'trennung'], 2: ['panisch', 'ohrfeige', 'blutig', 'schrei'], 1: ['gefahr', 'angst', 'furcht', 'alptraum', 'panik']}
a: ITEMS ['geburt', 'trennung']
a: AVG SIMILARITY 0.3701315224170685
a: MEAN VALENCE 0.09999999999999998
a: MEAN VALENCE STD 0.9302964040252568
a: MEAN AROUSAL 4.235294117647059
a: MEAN AROUSAL STD 0.8272933171531047
a: SUGGESTION VALUE 4.165390886009402
b: ITEMS ['gefahr', 'angst', 'furcht', 'alptraum', 'panik']
b: AVG SIMILARITY 0.3092178553342819
b: MEAN VALENCE -2.24
b: MEAN VALENCE STD 0.8035011257120784
b: MEAN AROUSAL 4.458263305322129
b: MEAN AROUSAL STD 0.636471903062702
b: SUGGESTION VALUE 5.272330949983665
c: ITEMS ['panisch', 'ohrfeige', 'blutig', 'schrei']
c: AVG SIMILARITY 0.18286674345533052
c: MEAN VALENCE -1.875
c: MEAN VALENCE STD 0.8426136143609835
c: MEAN AROUSAL 4.326443713450292
c: MEAN AROUSAL ST



### 4. Low Arousal

In [21]:
# Define relevant arousal words by threshold value

arousal_mean = 2
arousal_std = 1

low_arousal_words = list(bawl[(bawl["BISS_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] <= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

# Define relevant arousal words by ranking

bot_n = 25
arousal_std = 1

low_arousal_words = list(bawl[(bawl["BISS_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[:bot_n]

# Get vectors for arousal words

arousal_words_index = [modelBiss.key_to_index[word] for word in low_arousal_words]
arousal_word_vectors = [modelBiss.get_vector(i) for i in arousal_words_index]

# Normalize vectors on unit circle

arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)

# Tranform cluster to dictionary

arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = low_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_arousal_words)[class_members])))
    for member1 in np.array(low_arousal_words)[class_members]:
        for member2 in np.array(low_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelBiss.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + (4 - sum(arousal_means) / len(arousal_means)) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(low_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

low_arousal_Biss = list(suggestion_values[0][1])
with open(path_pickled + "\\low_arousal_Biss.pkl", "wb") as f:
    pickle.dump(low_arousal_Biss, f)

{3: ['schlaf', 'weich', 'wiese', 'wenig', 'boden', 'gras', 'decke', 'garten'], 1: ['still', 'zaghaft', 'stumm'], 0: ['pause', 'friede', 'segen'], 2: ['beutel', 'schale', 'ding', 'honig', 'nudel', 'teller'], 4: ['baum', 'kamin', 'liege', 'klang', 'tanne']}
a: ITEMS ['pause', 'friede', 'segen']
a: AVG SIMILARITY 0.21038169662157694
a: MEAN VALENCE 1.7166666666666668
a: MEAN VALENCE STD 1.1520701533333333
a: MEAN AROUSAL 1.4436507936507936
a: MEAN AROUSAL STD 0.7749930839381302
a: SUGGESTION VALUE 2.305685492757675
b: ITEMS ['still', 'zaghaft', 'stumm']
b: AVG SIMILARITY 0.21622995535532633
b: MEAN VALENCE -0.3666666666666667
b: MEAN VALENCE STD 0.7133333333333334
b: MEAN AROUSAL 1.4210526315789471
b: MEAN AROUSAL STD 0.7891388740917341
b: SUGGESTION VALUE 2.3222402603161876
c: ITEMS ['beutel', 'schale', 'ding', 'honig', 'nudel', 'teller']
c: AVG SIMILARITY 0.2789863131940365
c: MEAN VALENCE 0.8600980392156862
c: MEAN VALENCE STD 0.840368452979658
c: MEAN AROUSAL 1.5302899610136453
c: MEA



## 3. WARRIORCATS

### 1.1 High Valence

In [22]:
# Define relevant valence words by threshold value

emo_mean = 2
emo_std = 1

high_valence_words = list(bawl[(bawl["WARRIORCATS_FREQ"] >= 50) & (bawl["EMO_MEAN"] >= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

# Define relevant valence words by ranking

top_n = 25
emo_std = 1

high_valence_words = list(bawl[(bawl["WARRIORCATS_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[-top_n:]

# Get vectors for valence words

valence_words_index = [modelWarriorCats.key_to_index[word] for word in high_valence_words]
valence_word_vectors = [modelWarriorCats.get_vector(i) for i in valence_words_index]

# Normalize vectors on unit circle

valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

# Cluster with Affinity Propagation on PCA

valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)

# Tranform cluster to dictionary

valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = high_valence_words[i]
    valence_sem_groups[label].append(word)

print(valence_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_valence_words)[class_members])))
    for member1 in np.array(high_valence_words)[class_members]:
        for member2 in np.array(high_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelWarriorCats.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))                        
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

high_val_WarriorCats = list(suggestion_values[0][1])
with open(path_pickled + "\\high_val_WarriorCats.pkl", "wb") as f:
    pickle.dump(high_val_WarriorCats, f)

{2: ['warm', 'himmel', 'blüte'], 0: ['treu', 'vertrauen', 'wahrheit', 'gesund', 'lieben'], 1: ['sieg', 'reisen', 'reise', 'genießen', 'leben', 'zuhause'], 3: ['freuen', 'gefühl', 'spaß', 'perfekt', 'super', 'glück', 'freude', 'liebe'], 4: ['lächeln', 'freund', 'lachen']}
a: ITEMS ['treu', 'vertrauen', 'wahrheit', 'gesund', 'lieben']
a: AVG SIMILARITY 0.18742917850613594
a: MEAN VALENCE 2.4623529411764706
a: MEAN VALENCE STD 0.7115240315537839
a: MEAN AROUSAL 2.831578947368421
a: MEAN AROUSAL STD 1.1714270441523005
a: SUGGESTION VALUE 2.473763061245317
b: ITEMS ['sieg', 'reisen', 'reise', 'genießen', 'leben', 'zuhause']
b: AVG SIMILARITY 0.21451869681477548
b: MEAN VALENCE 2.3151960784313728
b: MEAN VALENCE STD 0.7585605537396574
b: MEAN AROUSAL 2.7914488017429195
b: MEAN AROUSAL STD 1.2860334867278962
b: SUGGESTION VALUE 2.373516051115415
c: ITEMS ['warm', 'himmel', 'blüte']
c: AVG SIMILARITY 0.18990723292032877
c: MEAN VALENCE 2.2566666666666664
c: MEAN VALENCE STD 0.7234884899999999




### 2. Low Valence

In [23]:
# Define relevant valence words by threshold value

emo_mean = -2
emo_std = 1

low_valence_words = list(bawl[(bawl["WARRIORCATS_FREQ"] >= 50) & (bawl["EMO_MEAN"] <= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

# Define relevant valence words by ranking

bot_n = 25
emo_std = 1

low_valence_words = list(bawl[(bawl["WARRIORCATS_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[:bot_n]

# Get vectors for valence words

valence_words_index = [modelWarriorCats.key_to_index[word] for word in low_valence_words]
valence_word_vectors = [modelWarriorCats.get_vector(i) for i in valence_words_index]

# Normalize vectors on unit circle

valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)

# Tranform cluster to dictionary

valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = low_valence_words[i]
    valence_sem_groups[label].append(word)
    
print(valence_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_valence_words)[class_members])))
    for member1 in np.array(low_valence_words)[class_members]:
        for member2 in np.array(low_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelWarriorCats.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))
    
low_val_WarriorCats = list(suggestion_values[0][1])
with open(path_pickled + "\\low_val_WarriorCats.pkl", "wb") as f:
    pickle.dump(low_val_WarriorCats, f)

{0: ['krieg', 'tote', 'zerstören', 'streit', 'stehlen'], 1: ['mord', 'tod', 'töten', 'strafe', 'verrat', 'unfall', 'schlecht', 'verlust', 'grausam', 'lüge'], 3: ['qual', 'angst', 'furcht', 'trauer'], 2: ['tot', 'leiche', 'leichnam', 'grab', 'leblos', 'einsam']}
a: ITEMS ['krieg', 'tote', 'zerstören', 'streit', 'stehlen']
a: AVG SIMILARITY 0.22577126920223237
a: MEAN VALENCE -2.4858823529411764
a: MEAN VALENCE STD 0.5927022695740826
a: MEAN AROUSAL 4.067040149393091
a: MEAN AROUSAL STD 0.863080592508131
a: SUGGESTION VALUE 2.704378543118669
b: ITEMS ['mord', 'tod', 'töten', 'strafe', 'verrat', 'unfall', 'schlecht', 'verlust', 'grausam', 'lüge']
b: AVG SIMILARITY 0.24374271349774468
b: MEAN VALENCE -2.42735294117647
b: MEAN VALENCE STD 0.6963854679950113
b: MEAN AROUSAL 4.013174603174603
b: MEAN AROUSAL STD 0.9453614309336433
b: SUGGESTION VALUE 3.2165948125244337
c: ITEMS ['tot', 'leiche', 'leichnam', 'grab', 'leblos', 'einsam']
c: AVG SIMILARITY 0.2657271149257819
c: MEAN VALENCE -2.35



### 3. High Arousal

In [24]:
# Define relevant arousal words by threshold value

arousal_mean = 2
arousal_std = 1

high_arousal_words = list(bawl[(bawl["WARRIORCATS_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] >= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

# Define relevant arousal words by ranking

top_n = 25
arousal_std = 1

high_arousal_words = list(bawl[(bawl["WARRIORCATS_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[-top_n:]

# Get vectors for arousal words

arousal_words_index = [modelWarriorCats.key_to_index[word] for word in high_arousal_words]
arousal_word_vectors = [modelWarriorCats.get_vector(i) for i in arousal_words_index]

# Normalize vectors on unit circle

arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)

# Tranform cluster to dictionary

arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = high_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_arousal_words)[class_members])))
    for member1 in np.array(high_arousal_words)[class_members]:
        for member2 in np.array(high_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelWarriorCats.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + sum(arousal_means) / len(arousal_means) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(high_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))

# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

high_arousal_WarriorCats = list(suggestion_values[0][1])
with open(path_pickled + "\\high_arousal_WarriorCats.pkl", "wb") as f:
    pickle.dump(high_arousal_WarriorCats, f)



{5: ['wucht', 'zorn', 'schreck', 'schmerz', 'panisch', 'angst', 'furcht', 'panik'], 4: ['lüge', 'leid', 'unfall', 'mord', 'krieg'], 0: ['sturm', 'scharf', 'schrei'], 1: ['leiche', 'tot', 'geburt'], 2: ['feind', 'gefahr', 'brutal'], 3: ['macht', 'tote', 'blutig']}
a: ITEMS ['sturm', 'scharf', 'schrei']
a: AVG SIMILARITY 0.2016271948814392
a: MEAN VALENCE -0.5666666666666668
a: MEAN VALENCE STD 1.1660722389296734
a: MEAN AROUSAL 4.208771929824562
a: MEAN AROUSAL STD 0.8045649177778761
a: SUGGESTION VALUE 4.174203823795171
b: ITEMS ['leiche', 'tot', 'geburt']
b: AVG SIMILARITY 0.26984018584092456
b: MEAN VALENCE -1.0166666666666668
b: MEAN VALENCE STD 0.84035472
b: MEAN AROUSAL 4.161998132586368
b: MEAN AROUSAL STD 0.8866517010928522
b: SUGGESTION VALUE 4.1039966578917815
c: ITEMS ['feind', 'gefahr', 'brutal']
c: AVG SIMILARITY 0.30015169084072113
c: MEAN VALENCE -1.9823529411764707
c: MEAN VALENCE STD 0.7779619537104682
c: MEAN AROUSAL 4.352422723475356
c: MEAN AROUSAL STD 0.873801475922

### 4. Low Arousal

In [25]:
# Define relevant arousal words by threshold value

arousal_mean = 2
arousal_std = 1

low_arousal_words = list(bawl[(bawl["WARRIORCATS_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] <= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

# Define relevant arousal words by ranking

bot_n = 25
arousal_std = 1

low_arousal_words = list(bawl[(bawl["WARRIORCATS_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[:bot_n]

# Get vectors for arousal words

arousal_words_index = [modelWarriorCats.key_to_index[word] for word in low_arousal_words]
arousal_word_vectors = [modelWarriorCats.get_vector(i) for i in arousal_words_index]

# Normalize vectors on unit circle

arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)

# Tranform cluster to dictionary

arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = low_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_arousal_words)[class_members])))
    for member1 in np.array(low_arousal_words)[class_members]:
        for member2 in np.array(low_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelWarriorCats.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + (4 - sum(arousal_means) / len(arousal_means)) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(low_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

low_arousal_WarriorCats = list(suggestion_values[0][1])
with open(path_pickled + "\\low_arousal_WarriorCats.pkl", "wb") as f:
    pickle.dump(low_arousal_WarriorCats, f)



{3: ['schlaf', 'weich', 'boden', 'gras', 'sand'], 4: ['still', 'stumm', 'schweigen'], 0: ['pause', 'friede', 'gesund'], 2: ['wiese', 'wenig', 'decke', 'himmel', 'milch', 'wange', 'feder', 'regnen', 'glas'], 1: ['birke', 'baum', 'ding', 'stamm', 'eiche']}
a: ITEMS ['pause', 'friede', 'gesund']
a: AVG SIMILARITY 0.20022520422935486
a: MEAN VALENCE 2.0833333333333335
a: MEAN VALENCE STD 0.8876682699999999
a: MEAN AROUSAL 1.470843776106934
a: MEAN AROUSAL STD 0.7329413179146852
a: SUGGESTION VALUE 2.3111289825792305
b: ITEMS ['birke', 'baum', 'ding', 'stamm', 'eiche']
b: AVG SIMILARITY 0.26235354617238044
b: MEAN VALENCE 0.9061176470588237
b: MEAN VALENCE STD 1.0346646551043865
b: MEAN AROUSAL 1.5757660818713448
b: MEAN AROUSAL STD 0.7144040018168821
b: SUGGESTION VALUE 2.5170613910359356
c: ITEMS ['wiese', 'wenig', 'decke', 'himmel', 'milch', 'wange', 'feder', 'regnen', 'glas']
c: AVG SIMILARITY 0.16997984631194007
c: MEAN VALENCE 0.9777777777777779
c: MEAN VALENCE STD 0.981738178859405
c

## 4. DFFF

### 1. High Valence

In [26]:
# Define relevant valence words by threshold value

emo_mean = 2
emo_std = 1

high_valence_words = list(bawl[(bawl["DFFF_FREQ"] >= 50) & (bawl["EMO_MEAN"] >= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

# Define relevant valence words by ranking

top_n = 25
emo_std = 1

high_valence_words = list(bawl[(bawl["DFFF_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[-top_n:]

# Get vectors for valence words

valence_words_index = [modelDFFF.key_to_index[word] for word in high_valence_words]
valence_word_vectors = [modelDFFF.get_vector(i) for i in valence_words_index]

# Normalize vectors on unit circle

valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

# Cluster with Affinity Propagation on PCA

valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)

# Tranform cluster to dictionary

valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = high_valence_words[i]
    valence_sem_groups[label].append(word)

print(valence_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_valence_words)[class_members])))
    for member1 in np.array(high_valence_words)[class_members]:
        for member2 in np.array(high_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelDFFF.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))                        
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

high_val_DFFF = list(suggestion_values[0][1])
with open(path_pickled + "\\high_val_DFFF.pkl", "wb") as f:
    pickle.dump(high_val_DFFF, f)



{3: ['genießen', 'positiv', 'zuhause', 'wahrheit', 'küssen', 'gesund', 'lieben', 'liebe'], 0: ['spaß', 'musik', 'fantasie', 'sex', 'glück', 'freude'], 2: ['natur', 'leben', 'urlaub', 'sommer', 'lebendig', 'ferien', 'freiheit'], 1: ['perfekt', 'super', 'freund', 'lachen']}
a: ITEMS ['spaß', 'musik', 'fantasie', 'sex', 'glück', 'freude']
a: AVG SIMILARITY 0.15246365902324518
a: MEAN VALENCE 2.5068627450980387
a: MEAN VALENCE STD 0.5737877172904731
a: MEAN AROUSAL 3.1660446049764936
a: MEAN AROUSAL STD 1.2517520023314639
a: SUGGESTION VALUE 2.7948213025767576
b: ITEMS ['perfekt', 'super', 'freund', 'lachen']
b: AVG SIMILARITY 0.15041651452581087
b: MEAN VALENCE 2.526470588235294
b: MEAN VALENCE STD 0.6619063968950484
b: MEAN AROUSAL 2.850657894736842
b: MEAN AROUSAL STD 1.2136001103827632
b: SUGGESTION VALUE 2.4492216416096033
c: ITEMS ['natur', 'leben', 'urlaub', 'sommer', 'lebendig', 'ferien', 'freiheit']
c: AVG SIMILARITY 0.23858456703878583
c: MEAN VALENCE 2.511764705882353
c: MEAN VA

### 2. Low Valence

In [27]:
# Define relevant valence words by threshold value

emo_mean = -2
emo_std = 1

low_valence_words = list(bawl[(bawl["DFFF_FREQ"] >= 50) & (bawl["EMO_MEAN"] <= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

# Define relevant valence words by ranking

bot_n = 25
emo_std = 1

low_valence_words = list(bawl[(bawl["DFFF_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[:bot_n]

# Get vectors for valence words

valence_words_index = [modelDFFF.key_to_index[word] for word in low_valence_words]
valence_word_vectors = [modelDFFF.get_vector(i) for i in valence_words_index]

# Normalize vectors on unit circle

valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)

# Tranform cluster to dictionary

valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = low_valence_words[i]
    valence_sem_groups[label].append(word)
    
print(valence_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_valence_words)[class_members])))
    for member1 in np.array(low_valence_words)[class_members]:
        for member2 in np.array(low_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelDFFF.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))
    
low_val_DFFF = list(suggestion_values[0][1])
with open(path_pickled + "\\low_val_DFFF.pkl", "wb") as f:
    pickle.dump(low_val_DFFF, f)



{4: ['alptraum', 'mord', 'strafe', 'gefängnis'], 0: ['tod', 'tot', 'leiche', 'negativ', 'unfall', 'grab', 'schlecht', 'streit', 'lüge'], 1: ['töten', 'zerstören', 'waffe'], 3: ['gewalt', 'angst', 'tote', 'gift', 'verlust'], 2: ['erzfeind', 'stehlen', 'fluch'], 5: ['einsam']}
a: ITEMS ['tod', 'tot', 'leiche', 'negativ', 'unfall', 'grab', 'schlecht', 'streit', 'lüge']
a: AVG SIMILARITY 0.19258663948211405
a: MEAN VALENCE -2.3797385620915033
a: MEAN VALENCE STD 0.734109942174734
a: MEAN AROUSAL 3.9797178130511464
a: MEAN AROUSAL STD 0.9027526471734532
a: SUGGESTION VALUE 2.851675198947755
b: ITEMS ['töten', 'zerstören', 'waffe']
b: AVG SIMILARITY 0.18203230450550714
b: MEAN VALENCE -2.52843137254902
b: MEAN VALENCE STD 0.7754677122323823
b: MEAN AROUSAL 4.2
b: MEAN AROUSAL STD 1.0595421250470258
b: SUGGESTION VALUE 2.2399997037772352
c: ITEMS ['erzfeind', 'stehlen', 'fluch']
c: AVG SIMILARITY 0.18986394504706064
c: MEAN VALENCE -2.266666666666667
c: MEAN VALENCE STD 0.5616370213557839
c: 

### 3. High Arousal

In [28]:
# Define relevant arousal words by threshold value

arousal_mean = 2
arousal_std = 1

high_arousal_words = list(bawl[(bawl["DFFF_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] >= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

# Define relevant arousal words by ranking

top_n = 25
arousal_std = 1

high_arousal_words = list(bawl[(bawl["DFFF_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[-top_n:]

# Get vectors for arousal words

arousal_words_index = [modelDFFF.key_to_index[word] for word in high_arousal_words]
arousal_word_vectors = [modelDFFF.get_vector(i) for i in arousal_words_index]

# Normalize vectors on unit circle

arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)

# Tranform cluster to dictionary

arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = high_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_arousal_words)[class_members])))
    for member1 in np.array(high_arousal_words)[class_members]:
        for member2 in np.array(high_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelDFFF.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + sum(arousal_means) / len(arousal_means) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(high_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))

# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

high_arousal_DFFF = list(suggestion_values[0][1])
with open(path_pickled + "\\high_arousal_DFFF.pkl", "wb") as f:
    pickle.dump(high_arousal_DFFF, f)

{5: ['wahnsinn', 'panisch', 'schrei', 'alptraum'], 4: ['zorn', 'macht', 'schreck', 'schmerz', 'leid', 'gefahr', 'angst', 'panik'], 2: ['leiche', 'tot', 'tote', 'waffe', 'mord', 'notfall'], 3: ['feind', 'sex', 'gewalt'], 0: ['geburt', 'unfall', 'trennung'], 1: ['drohung']}
a: ITEMS ['geburt', 'unfall', 'trennung']
a: AVG SIMILARITY 0.29985249042510986
a: MEAN VALENCE -0.7166666666666668
a: MEAN VALENCE STD 0.8438043993501712
a: MEAN AROUSAL 4.236227824463119
a: MEAN AROUSAL STD 0.7849755625877451
a: SUGGESTION VALUE 4.342372638881848
b: ITEMS ['drohung']
b: MEAN VALENCE -2.1
b: MEAN VALENCE STD 0.5676462121975466
b: MEAN AROUSAL 4.176470588235294
b: MEAN AROUSAL STD 0.9510056596602792
b: SUGGESTION VALUE 0
c: ITEMS ['leiche', 'tot', 'tote', 'waffe', 'mord', 'notfall']
c: AVG SIMILARITY 0.21532328575849533
c: MEAN VALENCE -2.4
c: MEAN VALENCE STD 0.7249168763643997
c: MEAN AROUSAL 4.282990974167444
c: MEAN AROUSAL STD 0.8445978068412491
c: SUGGESTION VALUE 4.8963294908586334
d: ITEMS ['f



e: AVG SIMILARITY 0.20316703444612877
e: MEAN VALENCE -1.6544117647058825
e: MEAN VALENCE STD 1.0615437050600067
e: MEAN AROUSAL 4.263562398643668
e: MEAN AROUSAL STD 0.7630297015261224
e: SUGGESTION VALUE 5.472051330558842
f: ITEMS ['wahnsinn', 'panisch', 'schrei', 'alptraum']
f: AVG SIMILARITY 0.1668980544588218
f: MEAN VALENCE -2.05
f: MEAN VALENCE STD 0.842378981289877
f: MEAN AROUSAL 4.320949432404541
f: MEAN AROUSAL STD 0.7003996267194719
f: SUGGESTION VALUE 4.603666545554026
1 ['zorn', 'macht', 'schreck', 'schmerz', 'leid', 'gefahr', 'angst', 'panik']
2 ['leiche', 'tot', 'tote', 'waffe', 'mord', 'notfall']
3 ['wahnsinn', 'panisch', 'schrei', 'alptraum']
4 ['geburt', 'unfall', 'trennung']
5 ['feind', 'sex', 'gewalt']
6 ['drohung']


### 4. Low Arousal

In [29]:
# Define relevant arousal words by threshold value

arousal_mean = 2
arousal_std = 1

low_arousal_words = list(bawl[(bawl["DFFF_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] <= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

# Define relevant arousal words by ranking

bot_n = 25
arousal_std = 1

low_arousal_words = list(bawl[(bawl["DFFF_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[:bot_n]

# Get vectors for arousal words

arousal_words_index = [modelDFFF.key_to_index[word] for word in low_arousal_words]
arousal_word_vectors = [modelDFFF.get_vector(i) for i in arousal_words_index]

# Normalize vectors on unit circle

arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)

# Tranform cluster to dictionary

arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = low_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_arousal_words)[class_members])))
    for member1 in np.array(low_arousal_words)[class_members]:
        for member2 in np.array(low_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelDFFF.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + (4 - sum(arousal_means) / len(arousal_means)) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(low_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

low_arousal_DFFF = list(suggestion_values[0][1])
with open(path_pickled + "\\low_arousal_DFFF.pkl", "wb") as f:
    pickle.dump(low_arousal_DFFF, f)

{5: ['schlaf', 'weich', 'decke', 'sofa', 'kissen', 'zimmer'], 2: ['still', 'pause', 'zaghaft', 'stumm'], 0: ['wiese', 'baum', 'klang', 'garten', 'himmel'], 6: ['liege', 'boden', 'gras', 'sand'], 1: ['wenig'], 4: ['ding', 'teller', 'erbse', 'eimer'], 3: ['gesund']}
a: ITEMS ['wiese', 'baum', 'klang', 'garten', 'himmel']
a: AVG SIMILARITY 0.2422032065689564
a: MEAN VALENCE 1.6799999999999997
a: MEAN VALENCE STD 0.9171854339985097
a: MEAN AROUSAL 1.5503646370829034
a: MEAN AROUSAL STD 0.7655993045508671
a: SUGGESTION VALUE 2.4584236588003314
b: ITEMS ['wenig']
b: MEAN VALENCE -0.9
b: MEAN VALENCE STD 0.99
b: MEAN AROUSAL 1.5263157894736843
b: MEAN AROUSAL STD 0.7723284457212328
b: SUGGESTION VALUE 0
c: ITEMS ['still', 'pause', 'zaghaft', 'stumm']
c: AVG SIMILARITY 0.15788310642043749




c: MEAN VALENCE 0.012499999999999983
c: MEAN VALENCE STD 0.8191770199999999
c: MEAN AROUSAL 1.4110275689223057
c: MEAN AROUSAL STD 0.7162574934508305
c: SUGGESTION VALUE 2.4682046137772704
d: ITEMS ['gesund']
d: MEAN VALENCE 2.7
d: MEAN VALENCE STD 0.48
d: MEAN AROUSAL 1.631578947368421
d: MEAN AROUSAL STD 0.7608859102526819
d: SUGGESTION VALUE 0
e: ITEMS ['ding', 'teller', 'erbse', 'eimer']
e: AVG SIMILARITY 0.19677923433482647
e: MEAN VALENCE 0.31514705882352945
e: MEAN VALENCE STD 0.8967921319694871
e: MEAN AROUSAL 1.6029154995331467
e: MEAN AROUSAL STD 0.7943356709064243
e: SUGGESTION VALUE 2.1873376087150826
f: ITEMS ['schlaf', 'weich', 'decke', 'sofa', 'kissen', 'zimmer']
f: AVG SIMILARITY 0.2090221405029297
f: MEAN VALENCE 1.4266666666666667
f: MEAN VALENCE STD 0.9923830048485082
f: MEAN AROUSAL 1.4980868019722509
f: MEAN AROUSAL STD 0.6815172568312103
f: SUGGESTION VALUE 2.7196143242910082
g: ITEMS ['liege', 'boden', 'gras', 'sand']
g: AVG SIMILARITY 0.33855227877696353
g: MEAN

## 5. MITTELERDE

### 1. High Valence

In [30]:
# Define relevant valence words by threshold value

emo_mean = 2
emo_std = 1

high_valence_words = list(bawl[(bawl["MITTELERDE_FREQ"] >= 50) & (bawl["EMO_MEAN"] >= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

# Define relevant valence words by ranking

top_n = 25
emo_std = 1

high_valence_words = list(bawl[(bawl["MITTELERDE_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[-top_n:]

# Get vectors for valence words

valence_words_index = [modelMittelerde.key_to_index[word] for word in high_valence_words]
valence_word_vectors = [modelMittelerde.get_vector(i) for i in valence_words_index]

# Normalize vectors on unit circle

valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

# Cluster with Affinity Propagation on PCA

valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)

# Tranform cluster to dictionary

valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = high_valence_words[i]
    valence_sem_groups[label].append(word)

print(valence_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_valence_words)[class_members])))
    for member1 in np.array(high_valence_words)[class_members]:
        for member2 in np.array(high_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelMittelerde.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))                        
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

high_val_Mittelerde = list(suggestion_values[0][1])
with open(path_pickled + "\\high_val_Mittelerde.pkl", "wb") as f:
    pickle.dump(high_val_Mittelerde, f)



{3: ['natur', 'wahrheit', 'frieden', 'heilung', 'freiheit'], 1: ['leben', 'urlaub', 'lebendig', 'zuhause', 'freund'], 5: ['musik', 'glück', 'lachen', 'gesund', 'freude', 'lieben'], 0: ['sonnig', 'positiv', 'sommer'], 2: ['perfekt', 'super', 'liebe'], 4: ['fantasie', 'sex', 'küssen']}
a: ITEMS ['sonnig', 'positiv', 'sommer']
a: AVG SIMILARITY 0.23365899423758188
a: MEAN VALENCE 2.433333333333333
a: MEAN VALENCE STD 0.7427490666666667
a: MEAN AROUSAL 2.4074074074074074
a: MEAN AROUSAL STD 1.2161264916817012
a: SUGGESTION VALUE 2.2275521049042806
b: ITEMS ['leben', 'urlaub', 'lebendig', 'zuhause', 'freund']
b: AVG SIMILARITY 0.21619584038853645
b: MEAN VALENCE 2.478235294117647
b: MEAN VALENCE STD 0.7066808329789417
b: MEAN AROUSAL 2.4439413484692123
b: MEAN AROUSAL STD 1.187826096136614
b: SUGGESTION VALUE 2.536929060693035
c: ITEMS ['perfekt', 'super', 'liebe']
c: AVG SIMILARITY 0.19730295365055403
c: MEAN VALENCE 2.6
c: MEAN VALENCE STD 0.6192645033333333
c: MEAN AROUSAL 3.382748538011

### 2. Low Valence

In [31]:
# Define relevant valence words by threshold value

emo_mean = -2
emo_std = 1

low_valence_words = list(bawl[(bawl["MITTELERDE_FREQ"] >= 50) & (bawl["EMO_MEAN"] <= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

# Define relevant valence words by ranking

bot_n = 25
emo_std = 1

low_valence_words = list(bawl[(bawl["MITTELERDE_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[:bot_n]

# Get vectors for valence words

valence_words_index = [modelMittelerde.key_to_index[word] for word in low_valence_words]
valence_word_vectors = [modelMittelerde.get_vector(i) for i in valence_words_index]

# Normalize vectors on unit circle

valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)

# Tranform cluster to dictionary

valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = low_valence_words[i]
    valence_sem_groups[label].append(word)
    
print(valence_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_valence_words)[class_members])))
    for member1 in np.array(low_valence_words)[class_members]:
        for member2 in np.array(low_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelMittelerde.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))
    
low_val_Mittelerde = list(suggestion_values[0][1])
with open(path_pickled + "\\low_val_Mittelerde.pkl", "wb") as f:
    pickle.dump(low_val_Mittelerde, f)

{3: ['krieg', 'seuche', 'gift'], 5: ['mord', 'tod', 'angst', 'verrat', 'sucht'], 0: ['folter', 'qual', 'gewalt', 'negativ', 'strafe'], 1: ['alptraum', 'unfall'], 2: ['foltern', 'töten', 'zerstören', 'hassen', 'waffe'], 4: ['tote', 'tot', 'leiche', 'leichnam', 'leblos']}
a: ITEMS ['folter', 'qual', 'gewalt', 'negativ', 'strafe']
a: AVG SIMILARITY 0.20706625580787658
a: MEAN VALENCE -2.6
a: MEAN VALENCE STD 0.6860240335421105
a: MEAN AROUSAL 4.09874686716792
a: MEAN AROUSAL STD 0.7193675800404499
a: SUGGESTION VALUE 2.7070470815618246
b: ITEMS ['alptraum', 'unfall']
b: AVG SIMILARITY 0.3417823314666748
b: MEAN VALENCE -2.575
b: MEAN VALENCE STD 0.6516379610168379
b: MEAN AROUSAL 4.383753501400561
b: MEAN AROUSAL STD 0.6623021631519119
b: SUGGESTION VALUE 2.4973216684209456
c: ITEMS ['foltern', 'töten', 'zerstören', 'hassen', 'waffe']
c: AVG SIMILARITY 0.21811757311224939
c: MEAN VALENCE -2.577058823529412
c: MEAN VALENCE STD 0.6912806273394294
c: MEAN AROUSAL 4.297777777777778
c: MEAN AR



### 3. High Arousal

In [32]:
# Define relevant arousal words by threshold value

arousal_mean = 2
arousal_std = 1

high_arousal_words = list(bawl[(bawl["MITTELERDE_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] >= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

# Define relevant arousal words by ranking

top_n = 25
arousal_std = 1

high_arousal_words = list(bawl[(bawl["MITTELERDE_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[-top_n:]

# Get vectors for arousal words

arousal_words_index = [modelMittelerde.key_to_index[word] for word in high_arousal_words]
arousal_word_vectors = [modelMittelerde.get_vector(i) for i in arousal_words_index]

# Normalize vectors on unit circle

arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)

# Tranform cluster to dictionary

arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = high_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_arousal_words)[class_members])))
    for member1 in np.array(high_arousal_words)[class_members]:
        for member2 in np.array(high_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelMittelerde.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + sum(arousal_means) / len(arousal_means) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(high_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))

# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

high_arousal_Mittelerde = list(suggestion_values[0][1])
with open(path_pickled + "\\high_arousal_Mittelerde.pkl", "wb") as f:
    pickle.dump(high_arousal_Mittelerde, f)



{5: ['unfall', 'schrei', 'alptraum'], 0: ['waffe', 'sklave', 'hassen'], 2: ['seuche', 'blutig', 'mord', 'brutal', 'bestie'], 6: ['trennung', 'sex', 'gewalt', 'foltern', 'folter'], 3: ['ohrfeige', 'alarm', 'notfall'], 4: ['gefahr', 'unheil', 'krieg'], 1: ['angst', 'furcht', 'panik']}
a: ITEMS ['waffe', 'sklave', 'hassen']
a: AVG SIMILARITY 0.20239900797605515
a: MEAN VALENCE -2.35
a: MEAN VALENCE STD 0.7953565899999999
a: MEAN AROUSAL 4.332776385407965
a: MEAN AROUSAL STD 0.7671721704198582
a: SUGGESTION VALUE 4.361934730983888
b: ITEMS ['angst', 'furcht', 'panik']
b: AVG SIMILARITY 0.5029648840427399
b: MEAN VALENCE -2.2
b: MEAN VALENCE STD 0.9231543621755721
b: MEAN AROUSAL 4.4603174603174605
b: MEAN AROUSAL STD 0.6296839533341932
b: SUGGESTION VALUE 5.0166818374114825
c: ITEMS ['seuche', 'blutig', 'mord', 'brutal', 'bestie']
c: AVG SIMILARITY 0.2508745491504669
c: MEAN VALENCE -2.2399999999999998
c: MEAN VALENCE STD 0.7182654249071911
c: MEAN AROUSAL 4.420034399724803
c: MEAN AROUSAL

### 4. Low Arousal

In [33]:
# Define relevant arousal words by threshold value

arousal_mean = 2
arousal_std = 1

low_arousal_words = list(bawl[(bawl["MITTELERDE_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] <= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

# Define relevant arousal words by ranking

bot_n = 25
arousal_std = 1

low_arousal_words = list(bawl[(bawl["MITTELERDE_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[:bot_n]

# Get vectors for arousal words

arousal_words_index = [modelMittelerde.key_to_index[word] for word in low_arousal_words]
arousal_word_vectors = [modelMittelerde.get_vector(i) for i in arousal_words_index]

# Normalize vectors on unit circle

arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)

# Tranform cluster to dictionary

arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = low_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_arousal_words)[class_members])))
    for member1 in np.array(low_arousal_words)[class_members]:
        for member2 in np.array(low_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelMittelerde.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + (4 - sum(arousal_means) / len(arousal_means)) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(low_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

low_arousal_Mittelerde = list(suggestion_values[0][1])
with open(path_pickled + "\\low_arousal_Mittelerde.pkl", "wb") as f:
    pickle.dump(low_arousal_Mittelerde, f)



{1: ['schlaf', 'friede', 'segen', 'ding'], 4: ['still', 'pause', 'zaghaft', 'stumm'], 2: ['wiege', 'beutel', 'seife', 'schale', 'kamin', 'harfe', 'wenig', 'traube'], 3: ['buche', 'wiese', 'murmel', 'baum', 'boden', 'weide'], 0: ['weich', 'seide', 'liege']}
a: ITEMS ['weich', 'seide', 'liege']
a: AVG SIMILARITY 0.2560364753007889
a: MEAN VALENCE 1.383333333333333
a: MEAN VALENCE STD 0.8504703326835047
a: MEAN AROUSAL 1.4551083591331269
a: MEAN AROUSAL STD 0.7186285995138822
a: SUGGESTION VALUE 2.410521977966332
b: ITEMS ['schlaf', 'friede', 'segen', 'ding']
b: AVG SIMILARITY 0.15586992353200912
b: MEAN VALENCE 1.4676470588235295
b: MEAN VALENCE STD 0.9968674659724254
b: MEAN AROUSAL 1.4027046783625732
b: MEAN AROUSAL STD 0.7118649636057858
b: SUGGESTION VALUE 2.4812132503673774
c: ITEMS ['wiege', 'beutel', 'seife', 'schale', 'kamin', 'harfe', 'wenig', 'traube']
c: AVG SIMILARITY 0.19899939931929111
c: MEAN VALENCE 0.83125
c: MEAN VALENCE STD 0.90787606875
c: MEAN AROUSAL 1.4660087719298

## 6. JACKSON

### 1. High Valence

In [34]:
# Define relevant valence words by threshold value

emo_mean = 2
emo_std = 1

high_valence_words = list(bawl[(bawl["JACKSON_FREQ"] >= 50) & (bawl["EMO_MEAN"] >= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

# Define relevant valence words by ranking

top_n = 25
emo_std = 1

high_valence_words = list(bawl[(bawl["JACKSON_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[-top_n:]

# Get vectors for valence words

valence_words_index = [modelJackson.key_to_index[word] for word in high_valence_words]
valence_word_vectors = [modelJackson.get_vector(i) for i in valence_words_index]

# Normalize vectors on unit circle

valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

# Cluster with Affinity Propagation on PCA

valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)

# Tranform cluster to dictionary

valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = high_valence_words[i]
    valence_sem_groups[label].append(word)

print(valence_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_valence_words)[class_members])))
    for member1 in np.array(high_valence_words)[class_members]:
        for member2 in np.array(high_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelJackson.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))                        
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

high_val_Jackson = list(suggestion_values[0][1])
with open(path_pickled + "\\high_val_Jackson.pkl", "wb") as f:
    pickle.dump(high_val_Jackson, f)



{0: ['fee', 'natur', 'wahrheit', 'lebendig', 'lieben', 'freiheit'], 2: ['schatz', 'perfekt', 'super'], 4: ['spaß', 'glück', 'gesund'], 5: ['genießen', 'freund', 'küssen', 'lachen'], 3: ['leben', 'musik', 'urlaub', 'sommer', 'zuhause', 'ferien', 'liebe'], 1: ['positiv', 'freude']}
a: ITEMS ['fee', 'natur', 'wahrheit', 'lebendig', 'lieben', 'freiheit']
a: AVG SIMILARITY 0.1592286874850591
a: MEAN VALENCE 2.5259803921568627
a: MEAN VALENCE STD 0.6775864694434531
a: MEAN AROUSAL 3.0092592592592595
a: MEAN AROUSAL STD 1.1350708043280056
a: SUGGESTION VALUE 2.6904063079471525
b: ITEMS ['positiv', 'freude']
b: AVG SIMILARITY 0.25711965560913086
b: MEAN VALENCE 2.55
b: MEAN VALENCE STD 0.705620285
b: MEAN AROUSAL 3.011437908496732
b: MEAN AROUSAL STD 1.2603425937469277
b: SUGGESTION VALUE 2.3169030560965664
c: ITEMS ['schatz', 'perfekt', 'super']
c: AVG SIMILARITY 0.23613144954045615
c: MEAN VALENCE 2.4
c: MEAN VALENCE STD 0.8328944326835045
c: MEAN AROUSAL 3.3388888888888886
c: MEAN AROUSAL S

### 2. Low Valence

In [35]:
# Define relevant valence words by threshold value

emo_mean = -2
emo_std = 1

low_valence_words = list(bawl[(bawl["JACKSON_FREQ"] >= 50) & (bawl["EMO_MEAN"] <= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

# Define relevant valence words by ranking

bot_n = 25
emo_std = 1

low_valence_words = list(bawl[(bawl["JACKSON_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[:bot_n]

# Get vectors for valence words

valence_words_index = [modelJackson.key_to_index[word] for word in low_valence_words]
valence_word_vectors = [modelJackson.get_vector(i) for i in valence_words_index]

# Normalize vectors on unit circle

valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)

# Tranform cluster to dictionary

valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = low_valence_words[i]
    valence_sem_groups[label].append(word)
    
print(valence_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_valence_words)[class_members])))
    for member1 in np.array(low_valence_words)[class_members]:
        for member2 in np.array(low_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelJackson.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))
    
low_val_Jackson = list(suggestion_values[0][1])
with open(path_pickled + "\\low_val_Jackson.pkl", "wb") as f:
    pickle.dump(low_val_Jackson, f)

{0: ['krieg', 'tote', 'strafe', 'streit'], 1: ['alptraum', 'tod', 'grausam', 'verlust', 'abschied'], 2: ['töten', 'angst', 'zerstören', 'tot', 'hassen', 'leiche', 'stehlen'], 3: ['qual', 'gift', 'waffe', 'lüge', 'armee'], 4: ['grab', 'schlecht', 'gefängnis', 'einsam']}
a: ITEMS ['krieg', 'tote', 'strafe', 'streit']
a: AVG SIMILARITY 0.22975971053044
a: MEAN VALENCE -2.5250000000000004
a: MEAN VALENCE STD 0.6171679692076537
a: MEAN AROUSAL 4.139355742296919
a: MEAN AROUSAL STD 0.689857151713467
a: SUGGESTION VALUE 2.598256121526231
b: ITEMS ['alptraum', 'tod', 'grausam', 'verlust', 'abschied']
b: AVG SIMILARITY 0.20284271240234375
b: MEAN VALENCE -2.42
b: MEAN VALENCE STD 0.7163172978373178
b: MEAN AROUSAL 3.8376283846872083
b: MEAN AROUSAL STD 1.0062252098323659
b: SUGGESTION VALUE 2.4332632350470123
c: ITEMS ['töten', 'angst', 'zerstören', 'tot', 'hassen', 'leiche', 'stehlen']
c: AVG SIMILARITY 0.1900075881608895
c: MEAN VALENCE -2.4978991596638656
c: MEAN VALENCE STD 0.72770618506817



### 3. High Arousal

In [36]:
# Define relevant arousal words by threshold value

arousal_mean = 2
arousal_std = 1

high_arousal_words = list(bawl[(bawl["JACKSON_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] >= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

# Define relevant arousal words by ranking

top_n = 25
arousal_std = 1

high_arousal_words = list(bawl[(bawl["JACKSON_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[-top_n:]

# Get vectors for arousal words

arousal_words_index = [modelJackson.key_to_index[word] for word in high_arousal_words]
arousal_word_vectors = [modelJackson.get_vector(i) for i in arousal_words_index]

# Normalize vectors on unit circle

arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)

# Tranform cluster to dictionary

arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = high_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_arousal_words)[class_members])))
    for member1 in np.array(high_arousal_words)[class_members]:
        for member2 in np.array(high_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelJackson.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + sum(arousal_means) / len(arousal_means) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(high_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))

# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

high_arousal_Jackson = list(suggestion_values[0][1])
with open(path_pickled + "\\high_arousal_Jackson.pkl", "wb") as f:
    pickle.dump(high_arousal_Jackson, f)

{0: ['dolch', 'scharf', 'waffe'], 4: ['lüge', 'wahnsinn', 'zorn', 'leiche', 'schreck', 'schmerz', 'panisch', 'gefahr', 'angst', 'panik'], 2: ['elend', 'schrei', 'alptraum'], 3: ['sturm', 'macht', 'geburt', 'tote', 'krieg'], 1: ['feind', 'leid', 'tot', 'hassen']}
a: ITEMS ['dolch', 'scharf', 'waffe']
a: AVG SIMILARITY 0.22201532125473022
a: MEAN VALENCE -1.2166666666666668
a: MEAN VALENCE STD 1.0200567651571266
a: MEAN AROUSAL 4.131996658312448
a: MEAN AROUSAL STD 0.8493489667196584
a: SUGGESTION VALUE 4.05708552024761
b: ITEMS ['feind', 'leid', 'tot', 'hassen']
b: AVG SIMILARITY 0.17434664749695608
b: MEAN VALENCE -2.2867647058823533
b: MEAN VALENCE STD 0.940749847377664
b: MEAN AROUSAL 4.231944444444445
b: MEAN AROUSAL STD 0.8703409258684832
b: SUGGESTION VALUE 4.297969526550171
c: ITEMS ['elend', 'schrei', 'alptraum']
c: AVG SIMILARITY 0.22127488007148108




c: MEAN VALENCE -2.1666666666666665
c: MEAN VALENCE STD 0.8487477742703143
c: MEAN AROUSAL 4.3501547987616105
c: MEAN AROUSAL STD 0.7615509744586365
c: SUGGESTION VALUE 4.410410835151478
d: ITEMS ['sturm', 'macht', 'geburt', 'tote', 'krieg']
d: AVG SIMILARITY 0.24368526935577392
d: MEAN VALENCE -0.72
d: MEAN VALENCE STD 0.858418484042998
d: MEAN AROUSAL 4.234873949579831
d: MEAN AROUSAL STD 0.8300607586082899
d: SUGGESTION VALUE 4.656511315725392
e: ITEMS ['lüge', 'wahnsinn', 'zorn', 'leiche', 'schreck', 'schmerz', 'panisch', 'gefahr', 'angst', 'panik']
e: AVG SIMILARITY 0.22440432392888598
e: MEAN VALENCE -1.9702941176470588
e: MEAN VALENCE STD 0.901858680819976
e: MEAN AROUSAL 4.232357855422871
e: MEAN AROUSAL STD 0.7507011664832689
e: SUGGESTION VALUE 6.036782870370844
1 ['lüge', 'wahnsinn', 'zorn', 'leiche', 'schreck', 'schmerz', 'panisch', 'gefahr', 'angst', 'panik']
2 ['sturm', 'macht', 'geburt', 'tote', 'krieg']
3 ['elend', 'schrei', 'alptraum']
4 ['feind', 'leid', 'tot', 'hasse

### 4. Low Arousal

In [37]:
# Define relevant arousal words by threshold value

arousal_mean = 2
arousal_std = 1

low_arousal_words = list(bawl[(bawl["JACKSON_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] <= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

# Define relevant arousal words by ranking

bot_n = 25
arousal_std = 1

low_arousal_words = list(bawl[(bawl["JACKSON_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[:bot_n]

# Get vectors for arousal words

arousal_words_index = [modelJackson.key_to_index[word] for word in low_arousal_words]
arousal_word_vectors = [modelJackson.get_vector(i) for i in arousal_words_index]

# Normalize vectors on unit circle

arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)

# Tranform cluster to dictionary

arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = low_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_arousal_words)[class_members])))
    for member1 in np.array(low_arousal_words)[class_members]:
        for member2 in np.array(low_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelJackson.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + (4 - sum(arousal_means) / len(arousal_means)) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(low_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

low_arousal_Jackson = list(suggestion_values[0][1])
with open(path_pickled + "\\low_arousal_Jackson.pkl", "wb") as f:
    pickle.dump(low_arousal_Jackson, f)

{5: ['schlaf', 'gesund'], 1: ['still', 'stumm'], 3: ['weich', 'wiese', 'baum', 'boden', 'gras', 'garten', 'kissen', 'himmel', 'sand'], 0: ['pause', 'friede', 'wenig'], 4: ['kamin', 'teller', 'decke', 'sofa', 'schlicht', 'zimmer'], 2: ['segen', 'ding', 'klang']}
a: ITEMS ['pause', 'friede', 'wenig']
a: AVG SIMILARITY 0.15853899468978247
a: MEAN VALENCE 0.8833333333333333
a: MEAN VALENCE STD 1.05766827
a: MEAN AROUSAL 1.4357560568086882
a: MEAN AROUSAL STD 0.7367554964042022
a: SUGGESTION VALUE 2.2990750169396876
b: ITEMS ['still', 'stumm']
b: AVG SIMILARITY 0.2627981901168823
b: MEAN VALENCE -0.15000000000000002
b: MEAN VALENCE STD 0.86
b: MEAN AROUSAL 1.3947368421052633
b: MEAN AROUSAL STD 0.7631127990415713
b: SUGGESTION VALUE 2.3207057752394777
c: ITEMS ['segen', 'ding', 'klang']
c: AVG SIMILARITY 0.15625752011934915
c: MEAN VALENCE 0.9568627450980394
c: MEAN VALENCE STD 0.923338936742171
c: MEAN AROUSAL 1.5615009746588695
c: MEAN AROUSAL STD 0.8311413107293601
c: SUGGESTION VALUE 2.



## 7. PANEM

### 1. High Valence

In [38]:
# Define relevant valence words by threshold value

emo_mean = 2
emo_std = 1

high_valence_words = list(bawl[(bawl["PANEM_FREQ"] >= 50) & (bawl["EMO_MEAN"] >= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

# Define relevant valence words by ranking

top_n = 25
emo_std = 1

high_valence_words = list(bawl[(bawl["PANEM_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[-top_n:]

# Get vectors for valence words

valence_words_index = [modelPanem.key_to_index[word] for word in high_valence_words]
valence_word_vectors = [modelPanem.get_vector(i) for i in valence_words_index]

# Normalize vectors on unit circle

valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

# Cluster with Affinity Propagation on PCA

valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)

# Tranform cluster to dictionary

valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = high_valence_words[i]
    valence_sem_groups[label].append(word)

print(valence_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_valence_words)[class_members])))
    for member1 in np.array(high_valence_words)[class_members]:
        for member2 in np.array(high_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelPanem.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))                        
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

high_val_Panem = list(suggestion_values[0][1])
with open(path_pickled + "\\high_val_Panem.pkl", "wb") as f:
    pickle.dump(high_val_Panem, f)

{5: ['gefühl', 'genießen', 'natur', 'sommer', 'zuhause', 'lieben', 'freiheit'], 0: ['himmel'], 1: ['lächeln', 'wahrheit', 'lachen'], 3: ['geschenk', 'schatz', 'spaß', 'positiv', 'perfekt', 'super', 'glück'], 2: ['leben', 'freund', 'freude'], 4: ['musik', 'lebendig', 'küssen', 'gesund']}
a: ITEMS ['himmel']
a: MEAN VALENCE 2.25
a: MEAN VALENCE STD 0.91046547
a: MEAN AROUSAL 1.65
a: MEAN AROUSAL STD 0.9880869341680842
a: SUGGESTION VALUE 0
b: ITEMS ['lächeln', 'wahrheit', 'lachen']
b: AVG SIMILARITY 0.21159330507119498
b: MEAN VALENCE 2.4803921568627456
b: MEAN VALENCE STD 0.7354398333414088
b: MEAN AROUSAL 2.73015873015873
b: MEAN AROUSAL STD 1.216288911502619
b: SUGGESTION VALUE 2.2649461332994303
c: ITEMS ['leben', 'freund', 'freude']
c: AVG SIMILARITY 0.21969045201937357
c: MEAN VALENCE 2.547058823529412
c: MEAN VALENCE STD 0.6790400545693869
c: MEAN AROUSAL 2.710354317165463
c: MEAN AROUSAL STD 1.3166120451268373
c: SUGGESTION VALUE 2.4167843869362766
d: ITEMS ['geschenk', 'schatz',



### 2. Low Valence

In [39]:
# Define relevant valence words by threshold value

emo_mean = -2
emo_std = 1

low_valence_words = list(bawl[(bawl["PANEM_FREQ"] >= 50) & (bawl["EMO_MEAN"] <= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

# Define relevant valence words by ranking

bot_n = 25
emo_std = 1

low_valence_words = list(bawl[(bawl["PANEM_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[:bot_n]

# Get vectors for valence words

valence_words_index = [modelPanem.key_to_index[word] for word in low_valence_words]
valence_word_vectors = [modelPanem.get_vector(i) for i in valence_words_index]

# Normalize vectors on unit circle

valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)

# Tranform cluster to dictionary

valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = low_valence_words[i]
    valence_sem_groups[label].append(word)
    
print(valence_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_valence_words)[class_members])))
    for member1 in np.array(low_valence_words)[class_members]:
        for member2 in np.array(low_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelPanem.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))
    
low_val_Panem = list(suggestion_values[0][1])
with open(path_pickled + "\\low_val_Panem.pkl", "wb") as f:
    pickle.dump(low_val_Panem, f)

{0: ['krieg', 'zerstören', 'strafe', 'streit', 'tragisch'], 3: ['alptraum', 'angst', 'furcht', 'einsam'], 4: ['tod', 'verlust', 'grausam', 'lüge', 'traurig', 'abschied'], 1: ['töten', 'gift', 'waffe', 'stehlen'], 2: ['tote', 'tot', 'leiche', 'schlecht', 'sarg', 'panisch']}
a: ITEMS ['krieg', 'zerstören', 'strafe', 'streit', 'tragisch']
a: AVG SIMILARITY 0.27205989211797715
a: MEAN VALENCE -2.425882352941177
a: MEAN VALENCE STD 0.6810176258180624
a: MEAN AROUSAL 4.0285714285714285
a: MEAN AROUSAL STD 0.8090459904161067
a: SUGGESTION VALUE 2.574163704489739
b: ITEMS ['töten', 'gift', 'waffe', 'stehlen']
b: AVG SIMILARITY 0.18814480118453503
b: MEAN VALENCE -2.4389705882352946
b: MEAN VALENCE STD 0.639471941710974
b: MEAN AROUSAL 4.07063492063492
b: MEAN AROUSAL STD 0.9383773149191051
b: SUGGESTION VALUE 2.4159930334616626
c: ITEMS ['tote', 'tot', 'leiche', 'schlecht', 'sarg', 'panisch']
c: AVG SIMILARITY 0.17701964937150477
c: MEAN VALENCE -2.3749999999999996
c: MEAN VALENCE STD 0.756938



### 3. High Arousal

In [40]:
# Define relevant arousal words by threshold value

arousal_mean = 2
arousal_std = 1

high_arousal_words = list(bawl[(bawl["PANEM_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] >= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

# Define relevant arousal words by ranking

top_n = 25
arousal_std = 1

high_arousal_words = list(bawl[(bawl["PANEM_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[-top_n:]

# Get vectors for arousal words

arousal_words_index = [modelPanem.key_to_index[word] for word in high_arousal_words]
arousal_word_vectors = [modelPanem.get_vector(i) for i in arousal_words_index]

# Normalize vectors on unit circle

arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)

# Tranform cluster to dictionary

arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = high_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_arousal_words)[class_members])))
    for member1 in np.array(high_arousal_words)[class_members]:
        for member2 in np.array(high_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelPanem.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + sum(arousal_means) / len(arousal_means) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(high_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))

# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

high_arousal_Panem = list(suggestion_values[0][1])
with open(path_pickled + "\\high_arousal_Panem.pkl", "wb") as f:
    pickle.dump(high_arousal_Panem, f)

{0: ['dolch', 'sturm', 'scharf', 'panisch', 'waffe', 'schrei'], 1: ['lüge', 'macht', 'leid', 'krieg'], 4: ['wahnsinn', 'zorn', 'schmerz', 'schreck', 'angst', 'furcht', 'alptraum', 'panik'], 3: ['leiche', 'tot', 'tote', 'blutig'], 2: ['feind', 'gefahr', 'brutal']}
a: ITEMS ['dolch', 'sturm', 'scharf', 'panisch', 'waffe', 'schrei']
a: AVG SIMILARITY 0.18600574582815171
a: MEAN VALENCE -1.2416666666666665
a: MEAN VALENCE STD 0.994731168710067
a: MEAN AROUSAL 4.189877471456419
a: MEAN AROUSAL STD 0.8268151831207557
a: SUGGESTION VALUE 4.756090600864469
b: ITEMS ['lüge', 'macht', 'leid', 'krieg']
b: AVG SIMILARITY 0.25238679101069766
b: MEAN VALENCE -1.7294117647058824
b: MEAN VALENCE STD 0.8925198492397692
b: MEAN AROUSAL 4.235912698412699
b: MEAN AROUSAL STD 0.745992807782174
b: SUGGESTION VALUE 4.5487971609516675
c: ITEMS ['feind', 'gefahr', 'brutal']
c: AVG SIMILARITY 0.24656043946743011
c: MEAN VALENCE -1.9823529411764707
c: MEAN VALENCE STD 0.7779619537104682
c: MEAN AROUSAL 4.3524227



e: MEAN VALENCE -2.0294117647058827
e: MEAN VALENCE STD 0.94953506810292
e: MEAN AROUSAL 4.30905695611578
e: MEAN AROUSAL STD 0.7102294743680319
e: SUGGESTION VALUE 5.7799644503145355
1 ['wahnsinn', 'zorn', 'schmerz', 'schreck', 'angst', 'furcht', 'alptraum', 'panik']
2 ['dolch', 'sturm', 'scharf', 'panisch', 'waffe', 'schrei']
3 ['lüge', 'macht', 'leid', 'krieg']
4 ['leiche', 'tot', 'tote', 'blutig']
5 ['feind', 'gefahr', 'brutal']


### 4. Low Arousal

In [41]:
# Define relevant arousal words by threshold value

arousal_mean = 2
arousal_std = 1

low_arousal_words = list(bawl[(bawl["PANEM_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] <= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

# Define relevant arousal words by ranking

bot_n = 25
arousal_std = 1

low_arousal_words = list(bawl[(bawl["PANEM_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[:bot_n]

# Get vectors for arousal words

arousal_words_index = [modelPanem.key_to_index[word] for word in low_arousal_words]
arousal_word_vectors = [modelPanem.get_vector(i) for i in arousal_words_index]

# Normalize vectors on unit circle

arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)

# Tranform cluster to dictionary

arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = low_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_arousal_words)[class_members])))
    for member1 in np.array(low_arousal_words)[class_members]:
        for member2 in np.array(low_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelPanem.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + (4 - sum(arousal_means) / len(arousal_means)) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(low_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

low_arousal_Panem = list(suggestion_values[0][1])
with open(path_pickled + "\\low_arousal_Panem.pkl", "wb") as f:
    pickle.dump(low_arousal_Panem, f)



{3: ['schlaf', 'weich', 'friede', 'ding', 'decke', 'teller', 'sofa', 'kissen'], 0: ['still', 'pause', 'wenig'], 1: ['wiese', 'baum', 'boden', 'gras', 'sand'], 4: ['zaghaft', 'stumm', 'gesund', 'wange'], 2: ['garten', 'himmel', 'herbst', 'schlicht', 'zimmer']}
a: ITEMS ['still', 'pause', 'wenig']
a: AVG SIMILARITY 0.22753689686457315
a: MEAN VALENCE 0.2833333333333333
a: MEAN VALENCE STD 0.9889026933333334
a: MEAN AROUSAL 1.390142021720969
a: MEAN AROUSAL STD 0.6106310947327894
a: SUGGESTION VALUE 2.57775742129807
b: ITEMS ['wiese', 'baum', 'boden', 'gras', 'sand']
b: AVG SIMILARITY 0.3421886444091797
b: MEAN VALENCE 1.1092941176470588
b: MEAN VALENCE STD 1.107478975479675
b: MEAN AROUSAL 1.5447973856209152
b: MEAN AROUSAL STD 0.7026595629208274
b: SUGGESTION VALUE 2.6734674418199678
c: ITEMS ['garten', 'himmel', 'herbst', 'schlicht', 'zimmer']
c: AVG SIMILARITY 0.20147554911673068
c: MEAN VALENCE 1.24
c: MEAN VALENCE STD 0.8025731934745244
c: MEAN AROUSAL 1.648888888888889
c: MEAN AROU

## Originals

### 1. High Valence

In [42]:
# Define relevant valence words by threshold value

emo_mean = 2
emo_std = 1

high_valence_words = list(bawl[(bawl["POTTERORIGINALS_FREQ"] >= 50) & (bawl["EMO_MEAN"] >= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

# Define relevant valence words by ranking

top_n = 25
emo_std = 1

high_valence_words = list(bawl[(bawl["POTTERORIGINALS_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[-top_n:]

# Get vectors for valence words

valence_words_index = [modelPotterOriginals.key_to_index[word] for word in high_valence_words]
valence_word_vectors = [modelPotterOriginals.get_vector(i) for i in valence_words_index]

# Normalize vectors on unit circle

valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

# Cluster with Affinity Propagation on PCA

valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)

# Tranform cluster to dictionary

valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = high_valence_words[i]
    valence_sem_groups[label].append(word)

print(valence_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_valence_words)[class_members])))
    for member1 in np.array(high_valence_words)[class_members]:
        for member2 in np.array(high_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelPotterOriginals.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))                        
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

high_val_PotterOriginals = list(suggestion_values[0][1])
with open(path_pickled + "\\high_val_PotterOriginals.pkl", "wb") as f:
    pickle.dump(high_val_PotterOriginals, f)



{0: ['lust', 'chance', 'spaß', 'glück'], 4: ['begeistern', 'lieb', 'ehrlich', 'mutig', 'freuen', 'leben', 'wahrheit', 'freund', 'lachen', 'lieben'], 2: ['luft', 'warm', 'vertrauen', 'gefühl', 'himmel'], 1: ['strahlen', 'lächeln'], 3: ['reise', 'geschenk', 'sommer', 'ferien']}
a: ITEMS ['lust', 'chance', 'spaß', 'glück']
a: AVG SIMILARITY 0.28434756646553677
a: MEAN VALENCE 2.2852941176470587
a: MEAN VALENCE STD 0.7373813269744321
a: MEAN AROUSAL 3.338854489164087
a: MEAN AROUSAL STD 1.2745497820177278
a: SUGGESTION VALUE 2.22712391572867
b: ITEMS ['strahlen', 'lächeln']
b: AVG SIMILARITY 0.29622453451156616
b: MEAN VALENCE 2.2470588235294118
b: MEAN VALENCE STD 0.8307058321763818
b: MEAN AROUSAL 2.4563492063492065
b: MEAN AROUSAL STD 1.160151066972619
b: SUGGESTION VALUE 1.8881167222657174
c: ITEMS ['luft', 'warm', 'vertrauen', 'gefühl', 'himmel']
c: AVG SIMILARITY 0.1610002614557743
c: MEAN VALENCE 2.2123529411764706
c: MEAN VALENCE STD 0.7756102190714588
c: MEAN AROUSAL 2.53526315789

### 2. Low Valence

In [43]:
# Define relevant valence words by threshold value

emo_mean = -2
emo_std = 1

low_valence_words = list(bawl[(bawl["POTTERORIGINALS_FREQ"] >= 50) & (bawl["EMO_MEAN"] <= emo_mean) & (bawl["EMO_STD"] <= emo_std)]["WORD_LOWER"])

# Define relevant valence words by ranking

bot_n = 25
emo_std = 1

low_valence_words = list(bawl[(bawl["POTTERORIGINALS_FREQ"] >= 50) & (bawl["EMO_STD"] <= emo_std)].sort_values("EMO_MEAN")["WORD_LOWER"])[:bot_n]

# Get vectors for valence words

valence_words_index = [modelPotterOriginals.key_to_index[word] for word in low_valence_words]
valence_word_vectors = [modelPotterOriginals.get_vector(i) for i in valence_words_index]

# Normalize vectors on unit circle

valence_word_cosine = np.array(normalize(valence_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

valence_cos_cluster = AF(affinity='euclidean')
valence_cos_cluster.fit(valence_word_cosine)

# Tranform cluster to dictionary

valence_sem_groups = {}
for i in range(len(valence_cos_cluster.labels_)):
    label = valence_cos_cluster.labels_[i]
    if label not in valence_sem_groups.keys():
        valence_sem_groups[label] = []

    word = low_valence_words[i]
    valence_sem_groups[label].append(word)
    
print(valence_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(valence_cos_cluster.cluster_centers_)), letters):
    class_members = valence_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_valence_words)[class_members])))
    for member1 in np.array(low_valence_words)[class_members]:
        for member2 in np.array(low_valence_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelPotterOriginals.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in valence_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + abs(sum(valence_means) / len(valence_means)) - (sum(valence_stds) / len(valence_stds)))*(1.05**(len(np.array(high_valence_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_valence_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))
    
low_val_PotterOriginals = list(suggestion_values[0][1])
with open(path_pickled + "\\low_val_PotterOriginals.pkl", "wb") as f:
    pickle.dump(low_val_PotterOriginals, f)

{3: ['mord', 'tod', 'zerstören', 'waffe', 'lüge', 'fluch', 'verlies', 'gefahr'], 0: ['töten', 'tot', 'zwingen', 'befehl', 'verletzen'], 1: ['angst', 'ärger', 'entsetzt'], 2: ['leiche', 'grab', 'stehlen', 'kerker'], 4: ['schlecht', 'traurig', 'schlimm', 'übel'], 5: ['schlagen']}
a: ITEMS ['töten', 'tot', 'zwingen', 'befehl', 'verletzen']
a: AVG SIMILARITY 0.22869802378118037
a: MEAN VALENCE -2.2470588235294118
a: MEAN VALENCE STD 0.8372256356877195
a: MEAN AROUSAL 4.043333333333334
a: MEAN AROUSAL STD 0.9863054651832167
a: SUGGESTION VALUE 2.0912271749750584
b: ITEMS ['angst', 'ärger', 'entsetzt']
b: AVG SIMILARITY 0.2643798887729645
b: MEAN VALENCE -2.1
b: MEAN VALENCE STD 0.6330686329267002
b: MEAN AROUSAL 3.9603174603174605
b: MEAN AROUSAL STD 0.8562684097727241
b: SUGGESTION VALUE 2.004209192549032
c: ITEMS ['leiche', 'grab', 'stehlen', 'kerker']
c: AVG SIMILARITY 0.22772920628388724
c: MEAN VALENCE -2.2125
c: MEAN VALENCE STD 0.7136672205871412
c: MEAN AROUSAL 3.897642390289449
c: 



e: MEAN VALENCE -2.030882352941177
e: MEAN VALENCE STD 0.7983870413209225
e: MEAN AROUSAL 3.472222222222222
e: MEAN AROUSAL STD 0.8546383679127187
e: SUGGESTION VALUE 1.826015068706931
f: ITEMS ['schlagen']
f: MEAN VALENCE -1.9117647058823528
f: MEAN VALENCE STD 0.9959812295548891
f: MEAN AROUSAL 3.9444444444444446
f: MEAN AROUSAL STD 0.8726040960806526
f: SUGGESTION VALUE 0
1 ['mord', 'tod', 'zerstören', 'waffe', 'lüge', 'fluch', 'verlies', 'gefahr']
2 ['leiche', 'grab', 'stehlen', 'kerker']
3 ['töten', 'tot', 'zwingen', 'befehl', 'verletzen']
4 ['angst', 'ärger', 'entsetzt']
5 ['schlecht', 'traurig', 'schlimm', 'übel']
6 ['schlagen']


### 3. High Arousal

In [44]:
# Define relevant arousal words by threshold value

arousal_mean = 2
arousal_std = 1

high_arousal_words = list(bawl[(bawl["POTTERORIGINALS_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] >= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

# Define relevant arousal words by ranking

top_n = 25
arousal_std = 1

high_arousal_words = list(bawl[(bawl["POTTERORIGINALS_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[-top_n:]

# Get vectors for arousal words

arousal_words_index = [modelPotterOriginals.key_to_index[word] for word in high_arousal_words]
arousal_word_vectors = [modelPotterOriginals.get_vector(i) for i in arousal_words_index]

# Normalize vectors on unit circle

arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)

# Tranform cluster to dictionary

arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = high_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(high_arousal_words)[class_members])))
    for member1 in np.array(high_arousal_words)[class_members]:
        for member2 in np.array(high_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelPotterOriginals.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + sum(arousal_means) / len(arousal_means) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(high_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(high_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))

# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

high_arousal_PotterOriginals = list(suggestion_values[0][1])
with open(path_pickled + "\\high_arousal_PotterOriginals.pkl", "wb") as f:
    pickle.dump(high_arousal_PotterOriginals, f)

{0: ['zwingen', 'opfer', 'angriff', 'kampf', 'macht', 'waffe', 'gefahr', 'mord'], 3: ['verraten', 'lüge', 'scharf'], 1: ['drängen', 'schlagen'], 2: ['ärger', 'warnen', 'leid'], 4: ['kerker', 'leiche', 'tot'], 5: ['zorn', 'schreck', 'schmerz', 'angst', 'schrei', 'panik']}
a: ITEMS ['zwingen', 'opfer', 'angriff', 'kampf', 'macht', 'waffe', 'gefahr', 'mord']
a: AVG SIMILARITY 0.29268886388412546
a: MEAN VALENCE -1.8297794117647062
a: MEAN VALENCE STD 0.9769039745112281
a: MEAN AROUSAL 4.124900793650793
a: MEAN AROUSAL STD 0.8269579190128641
a: SUGGESTION VALUE 5.304998408721197
b: ITEMS ['drängen', 'schlagen']
b: AVG SIMILARITY 0.20976389944553375
b: MEAN VALENCE -1.6176470588235294
b: MEAN VALENCE STD 1.001321764210206
b: MEAN AROUSAL 3.938888888888889
b: MEAN AROUSAL STD 0.9264345658938871
b: SUGGESTION VALUE 3.552495590240691
c: ITEMS ['ärger', 'warnen', 'leid']
c: AVG SIMILARITY 0.24544737239678702
c: MEAN VALENCE -1.3666666666666665
c: MEAN VALENCE STD 1.0149001794597508
c: MEAN AROU



### 4. Low Arousal

In [45]:
# Define relevant arousal words by threshold value

arousal_mean = 2
arousal_std = 1

low_arousal_words = list(bawl[(bawl["POTTERORIGINALS_FREQ"] >= 50) & (bawl["AROUSAL_MEAN"] <= arousal_mean) & (bawl["AROUSAL_STD"] <= arousal_std)]["WORD_LOWER"])

# Define relevant arousal words by ranking

bot_n = 25
arousal_std = 1

low_arousal_words = list(bawl[(bawl["POTTERORIGINALS_FREQ"] >= 50) & (bawl["AROUSAL_STD"] <= arousal_std)].sort_values("AROUSAL_MEAN")["WORD_LOWER"])[:bot_n]

# Get vectors for arousal words

arousal_words_index = [modelPotterOriginals.key_to_index[word] for word in low_arousal_words]
arousal_word_vectors = [modelPotterOriginals.get_vector(i) for i in arousal_words_index]

# Normalize vectors on unit circle

arousal_word_cosine = np.array(normalize(arousal_word_vectors,norm='l2'))

# Cluster with Affinity Propagation

arousal_cos_cluster = AF(affinity='euclidean')
arousal_cos_cluster.fit(arousal_word_cosine)

# Tranform cluster to dictionary

arousal_sem_groups = {}
for i in range(len(arousal_cos_cluster.labels_)):
    label = arousal_cos_cluster.labels_[i]
    if label not in arousal_sem_groups.keys():
        arousal_sem_groups[label] = []

    word = low_arousal_words[i]
    arousal_sem_groups[label].append(word)
    
print(arousal_sem_groups)

# Print relevant information of clusters

suggestion_values = []

letters = cycle('abcdefghijklmnopqrstuvxyz')
for k, let in zip(range(len(arousal_cos_cluster.cluster_centers_)), letters):
    class_members = arousal_cos_cluster.labels_ == k

    dist_list = []
    print(let + ": ITEMS " + str(list(np.array(low_arousal_words)[class_members])))
    for member1 in np.array(low_arousal_words)[class_members]:
        for member2 in np.array(low_arousal_words)[class_members]:
            if member1 != member2:
                dist_list.append(modelPotterOriginals.similarity(member1,member2))
    if (dist_list != []):
        print(let + ": AVG SIMILARITY " + str(sum(dist_list) / len(dist_list)))
        
    valence_means = []
    valence_stds = []
    arousal_means = []
    arousal_stds = []
    for word in arousal_sem_groups[k]:
        word_row = bawl[bawl["WORD_LOWER"] == word]
        valence_means.append(float(word_row["EMO_MEAN"]))
        valence_stds.append(float(word_row["EMO_STD"]))
        arousal_means.append(float(word_row["AROUSAL_MEAN"]))
        arousal_stds.append(float(word_row["AROUSAL_STD"]))

    print(let + ": MEAN VALENCE " + str(sum(valence_means) / len(valence_means)))
    print(let + ": MEAN VALENCE STD " + str(sum(valence_stds) / len(valence_stds)))
    print(let + ": MEAN AROUSAL " + str(sum(arousal_means) / len(arousal_means)))
    print(let + ": MEAN AROUSAL STD " + str(sum(arousal_stds) / len(arousal_stds)))
    
    try:
        suggestion_value = (sum(dist_list) / len(dist_list) + (4 - sum(arousal_means) / len(arousal_means)) - (sum(arousal_stds) / len(arousal_stds)))*(1.05**(len(np.array(low_arousal_words)[class_members])))
    except:
        suggestion_value = 0
    suggestion_values.append((suggestion_value,np.array(low_arousal_words)[class_members]))    
    print(let + ": SUGGESTION VALUE " + str(suggestion_values[-1][0]))
    
# Ranking for suggestion values

suggestion_values = sorted(suggestion_values, key=lambda tup: tup[0], reverse=True)
for rank in range(len(suggestion_values)):
    print(str(rank+1) + " " + str(list(suggestion_values[rank][1])))

low_arousal_PotterOriginals = list(suggestion_values[0][1])
with open(path_pickled + "\\low_arousal_PotterOriginals.pkl", "wb") as f:
    pickle.dump(low_arousal_PotterOriginals, f)

{4: ['schlaf', 'kamin', 'teller', 'kissen', 'zimmer', 'tisch', 'sessel'], 3: ['pause', 'stumm', 'schweigen'], 0: ['schale', 'ding', 'feder', 'becher', 'glas', 'hand', 'uhr'], 2: ['baum', 'boden', 'gras', 'decke', 'garten', 'himmel', 'wange'], 1: ['wenig']}
a: ITEMS ['schale', 'ding', 'feder', 'becher', 'glas', 'hand', 'uhr']
a: AVG SIMILARITY 0.2106831180197852
a: MEAN VALENCE 0.6292436974789916
a: MEAN VALENCE STD 0.9465792071813844
a: MEAN AROUSAL 1.678206229860365
a: MEAN AROUSAL STD 0.8190312862461895
a: SUGGESTION VALUE 2.4109900306506002
b: ITEMS ['wenig']
b: MEAN VALENCE -0.9
b: MEAN VALENCE STD 0.99
b: MEAN AROUSAL 1.5263157894736843
b: MEAN AROUSAL STD 0.7723284457212328
b: SUGGESTION VALUE 0
c: ITEMS ['baum', 'boden', 'gras', 'decke', 'garten', 'himmel', 'wange']
c: AVG SIMILARITY 0.24257203865618931
c: MEAN VALENCE 1.2757142857142856
c: MEAN VALENCE STD 1.028023331091998
c: MEAN AROUSAL 1.5947619047619046
c: MEAN AROUSAL STD 0.7495403739693831
c: SUGGESTION VALUE 2.671056181

