In [44]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
import nltk
import pandas as pd
import statistics
import math
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from scipy import spatial
from numpy import nan

In [3]:
# load medium library english for spaCy
nlp = spacy.load("en_core_web_md")
# download Vader for nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/ChristinScheib/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [4]:
path = "/Users/ChristinScheib/Documents/Sommersemester_2021/Challenges_SCM/C-SCM-DATA-Candidates_Evaluation_Anonymized_SS21.xlsx"


In [78]:
data = pd.read_excel(path, engine='openpyxl')
data_ = data[['Person ID','Evaluation Statement']]

statements = []
for index, row in data_.iterrows():
    statements.append((row['Person ID'], row['Evaluation Statement']))

statements_clean = [i for i in statements if nan not in i]

In [79]:
creative = ['creative', 'constructive', 'resourceful', 'imaginative', 'ingenious', 'canny', 'inventive', 'full of ideas', 'clever', 'adventurous', 'innovative', 'originative', 'visionary', 'fanciful', 'forward thinker', 'pioneering', 'fertile', 'mastermind', 'genius', 'go-ahead', 'witty', 'eccentrically', 'inspiring', 'stimulating', 'encouraging', 'rich in ideas', 'inspirational']
open_ = ['open','outgoing', 'curious', 'open-minded', 'broad-minded', 'honest', 'empathetic', 'respectful', 'positivity', 'emotional intelleligence', 'interest', 'interested', 'adapting' , 'informative', 'sharing', 'feedback', 'honesty', 'trust', 'valuing', 'diversity', 'perspective']
responsible = ['responsible','decisions', 'decision-maker', 'supportive', 'prepared', 'proactive', 'reliable', 'trustworthy', 'discipline', 'respectable', 'committed', 'integrity', 'pushing', 'assertive', 'obligated', 'judicious' , 'organized', 'managing', 'consistent']
entrepreneurial = ['entrepreneurial', 'enterprising', 'entrepreneurially', 'profit-oriented', 'for-profit', 'profit-seeking', 'need for achievement', "self-efficacy", 'innovativeness', 'stress tolerant', 'need for autonomy', 'proactive', 'disruptive', 'personality', 'venturesome', 'prepared to take risks', 'visionary', 'goal-oriented', 'purposeful', 'active', 'engaged', 'maker', 'doer', 'self-starter', 'calm', 'passionate', 'positive', 'convinced']

In [80]:
cosine_similarity = lambda x, y: 1 - spatial.distance.cosine(x, y)

In [81]:
def sentiment_scores(sentence):
    
    sid_obj = SentimentIntensityAnalyzer()
    sentiment_dict = sid_obj.polarity_scores(sentence)
     
    return sentiment_dict['compound']

In [82]:
_POS = {"ADJ"}

language_model = "en_core_web_lg"
nlp = spacy.load(language_model)

dict_ = {}
index = 0
for id_,s in statements_clean:
    adj_nouns = []
    if len(s) != 0:
        for sent in nlp(s).sents:
            for t in sent.as_doc():
                if(t.pos_ in _POS):
                    adj_nouns.append((t, sent, id_))

            for noun in sent.noun_chunks:
                adj_nouns.append((noun, sent, id_))        

        threshold = 0.725

        set_creative = set()
        set_open = set()
        set_responsible = set()
        set_entr = set()

        for i, sent, id_ in adj_nouns:
            for c in creative:
                sim = cosine_similarity(i.vector,nlp(c).vector)
                if(sim >= threshold):
                    set_creative.add(sent)
            for o in open_:
                sim = cosine_similarity(i.vector,nlp(o).vector)
                if(sim >= threshold):
                    set_open.add(sent)
            for r in responsible:
                sim = cosine_similarity(i.vector,nlp(r).vector)
                if(sim >= threshold):
                    set_responsible.add(sent)
            for e in entrepreneurial:
                sim = cosine_similarity(i.vector,nlp(e).vector)
                if(sim >= threshold):
                    set_entr.add(sent)

        ls_compound_creative = []
        if not set_creative:
            ls_compound_creative.append(0)
        else:
            for s in set_creative:
                ls_compound_creative.append(sentiment_scores(s.text))

        ls_compound_open = []
        if not set_open:
            ls_compound_open.append(0)
        else:
            for s in set_open:
                ls_compound_open.append(sentiment_scores(s.text))

        ls_compound_responsible = []
        if not set_responsible:
            ls_compound_responsible.append(0)
        else:
            for s in set_responsible:
                ls_compound_responsible.append(sentiment_scores(s.text))

        ls_compound_entr = []
        if not set_entr:
            ls_compound_entr.append(0)
        else:
            for s in set_entr:
                ls_compound_entr.append(sentiment_scores(s.text))

        dict_[id_] = {"creative": statistics.mean(ls_compound_creative), 
                      "open":statistics.mean(ls_compound_open) , 
                      "responsible": statistics.mean(ls_compound_responsible), 
                      "entrepreneurial":statistics.mean(ls_compound_entr)
                     }   

        if index % 5 == 0:
            print(index)
      #      break
        index = index + 1
    else: 
        dict_[id_] = {"creative": 0, 
                      "open": 0 , 
                      "responsible": 0, 
                      "entrepreneurial":0
                     }
        index = index + 1
        
    #print(dict_)


0
5
10
15
20
25
30
35
40
45
50
55
60
65
70
75
80
85
90
95
100
105
110
115
120
125
130
135
140
145
150
155
160
165
170
175
180
185
190
195
200
205
210
215
220
225
230
235
240
245
250
255
260
265
270
275
280
285
290
295
300
305
310
315
320
325
330
335
340
345
350
355
360
365
370
375
380
385
390
395
400
405
410
415
420
425
430
435
440
445
450
455
460
465
470
475
480
485
490
495
500
505
510
515
520
525
530
535
540
545
550
555
560
565
570
575
580
585
590
595
600
605
610
615
620
625
630
635
640
645
650
655
660
665
670
675
680
685
690
695
700
705
710
715
720
725
730
735
740
745
750
755
760
765
770
775
780
785
790
795
800
805
810
815
820
825
830
835
840
845
850
855
860
865
870
875
880
885
890
895
900
905
910
915
920
925
930
935
940
945
950
955
960
965
970
975
980
985
990
995
1000
1005
1010
1015
1020
1025
1030
1035
1040
1045
1050
1055
1060
1065
1070
1075
1080
1085
1090
1095
1100
1105
1110
1115
1120
1125
1130
1135
1140
1145
1150
1155
1160
1165
1170
1175
1180
1185
1190
1195
1200
1205
1210
1215
12

In [83]:
df = pd.DataFrame(data=dict_)

df = (df.T)

print(df)

df.to_excel('/Users/ChristinScheib/Documents/Sommersemester_2021/Challenges_SCM/CoreValues_Analyzed.xlsx')

          creative     open  responsible  entrepreneurial
580346.0  0.000000  0.62980      0.27160         0.419867
586882.0  0.831600  0.77830      0.00000         0.768367
574743.0  0.630067  0.61025      0.52895         0.724900
360306.0  0.000000  0.05160      0.00000         0.750600
790206.0  0.827333  0.00000      0.00000         0.000000
...            ...      ...          ...              ...
897173.0  0.000000  0.93180      0.93180         0.000000
573479.0  0.000000  0.00000      0.00000         0.784500
933345.0  0.612400  0.54230      0.53450         0.612400
535989.0  0.670500  0.00000      0.27320         0.000000
578781.0  0.401900  0.00000      0.68245         0.401900

[1643 rows x 4 columns]
