In [1]:
import numpy as np 
import pandas as pd 
import os
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt

#### Result calculating function

In [2]:
def bfi_calc(df):
    # Define the scoring keys
    scoring_key = {
        'Extraversion': [1, 6, 11, 16, 21, 26, 31, 36],
        'Agreeableness': [2, 7, 12, 17, 22, 27, 32, 37, 42],
        'Conscientiousness': [3, 8, 13, 18, 23, 28, 33, 38, 43],
        'Neuroticism': [4, 9, 14, 19, 24, 29, 34, 39],
        'Openness': [5, 10, 15, 20, 25, 30, 35, 40, 41, 44]
    }
    
    # Define reverse-scored items
    reverse_scored = [6, 21, 31, 2, 12, 27, 37, 8, 18, 23, 43, 9, 24, 34, 35, 41]
    reverse_scored = [6, 21, 31, 2, 12, 27, 37, 8, 18, 23, 43, 9, 24, 34, 35, 41]
    
    # Function to adjust scores
    def adjust_score(row):
        #print("for this row", row['persona'],"$$$$$", row['Itemnum'])
        if row['Itemnum'] in reverse_scored:
            #print("hit")
            return 6 - row['Answer']
        return row['Answer']
    
    # Add adjusted score column
    df['adjusted_score'] = df.apply(adjust_score,axis=1)
    #print(df[:44])
    # Function to calculate trait score
    def calculate_trait_score(persona_data, trait_items):
        scores = []
        for item in trait_items:
            #print("§§§§§§§§§§§§§§§§§§§§")
            score = persona_data.loc[persona_data['Itemnum'] == item, 'Answer'].iloc[0]
            if item in reverse_scored:
                score = 6 - score  # Reverse the score
            scores.append(score)
        return np.mean(scores)
    
    # Group by persona and calculate scores
    results = []
    for persona, group in df.groupby('Name'):
        scores = {}
        for trait, items in scoring_key.items():
            scores[trait] = calculate_trait_score(group, items)
        results.append({'Name': persona, **scores})
    
    # Create a dataframe with the results
    results_df = pd.DataFrame(results)
    results_df=results_df.round(3)
    # Display the results
    print(results_df)
    return results_df
    #results_df.to_csv(filename, index=False)

#### Results for Falcon 11B prompt1+2 good 

In [13]:
df_good=pd.read_csv('results_good11b.csv')
df_good=df_good[0:18788]
print(df_good.tail())

df_good.shape

                  Name source                                           Item  \
18783  Gerhard Fischer    GPP              Likes to reflect, play with ideas   
18784  Gerhard Fischer    GPP                     Has few artistic interests   
18785  Gerhard Fischer    GPP                 Likes to cooperate with others   
18786  Gerhard Fischer    GPP                           Is easily distracted   
18787  Gerhard Fischer    GPP  Is sophisticated in art, music, or Literature   

       Itemnum  Answer  
18783       40       1  
18784       41       1  
18785       42       1  
18786       43       1  
18787       44       5  


(18788, 5)

In [14]:
df_good.groupby(['Name']).count()

Unnamed: 0_level_0,source,Item,Itemnum,Answer
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A. T. Ariyaratne,44,44,44,44
A.H.M. Noman Khan,44,44,44,44
Abdon Nababan,44,44,44,44
Abdul Razak Hussein,44,44,44,44
Abdul Samad Ismail,44,44,44,44
...,...,...,...,...
Zacarias Sarian,44,44,44,44
Zafrullah Chowdhury,44,44,44,44
Zakiah Hanum Abd Hamid,44,44,44,44
Élie Ducommun,44,44,44,44


In [15]:
df_good

Unnamed: 0,Name,source,Item,Itemnum,Answer
0,Barack Obama,NPP,Is talkative,1,4
1,Barack Obama,NPP,Tends to find fault with others,2,3
2,Barack Obama,NPP,Does a thorough job,3,5
3,Barack Obama,NPP,"Is depressed, blue",4,1
4,Barack Obama,NPP,"Is original, comes up with new ideas",5,1
...,...,...,...,...,...
18783,Gerhard Fischer,GPP,"Likes to reflect, play with ideas",40,1
18784,Gerhard Fischer,GPP,Has few artistic interests,41,1
18785,Gerhard Fischer,GPP,Likes to cooperate with others,42,1
18786,Gerhard Fischer,GPP,Is easily distracted,43,1


In [16]:
df_good.dtypes

Name       object
source     object
Item       object
Itemnum     int64
Answer      int64
dtype: object

In [17]:
bfi_calc(df_good,'goodp1_1_bfi.csv')

                       Name  Extraversion  Agreeableness  Conscientiousness  \
0          A. T. Ariyaratne         3.875          3.000              3.778   
1         A.H.M. Noman Khan         1.750          3.222              2.667   
2             Abdon Nababan         2.875          2.889              2.444   
3       Abdul Razak Hussein         3.000          2.889              2.444   
4        Abdul Samad Ismail         3.000          3.111              4.000   
..                      ...           ...            ...                ...   
422         Zacarias Sarian         3.125          3.111              3.222   
423     Zafrullah Chowdhury         2.750          3.444              3.333   
424  Zakiah Hanum Abd Hamid         2.375          4.444              2.000   
425           Élie Ducommun         3.250          3.556              3.222   
426     Óscar Arias Sánchez         4.125          3.444              3.667   

     Neuroticism  Openness  
0          2.500      

In [5]:
df_good=pd.read_csv('results_bfi_goodp2.csv')
df_good=pd.read_csv('results_good11b.csv')
df_good=df_good[0:18788]
print(df_good.tail())

df_good.shape

In [6]:

df_good=df_good[37576:]
print(df_good.head())

df_good.shape

               Name source                                  Item  Itemnum  \
37576  Barack Obama    NPP                          Is talkative        1   
37577  Barack Obama    NPP       Tends to find fault with others        2   
37578  Barack Obama    NPP                   Does a thorough job        3   
37579  Barack Obama    NPP                    Is depressed, blue        4   
37580  Barack Obama    NPP  Is original, comes up with new ideas        5   

       Answer  
37576     1.0  
37577     1.0  
37578     1.0  
37579     1.0  
37580     4.0  


(18788, 5)

In [7]:
res_df=bfi_calc(df_good)
res_df.to_csv('good_bfi.csv', mode='a', header=False, index=False)

                       Name  Extraversion  Agreeableness  Conscientiousness  \
0          A. T. Ariyaratne         2.625          3.333              3.667   
1         A.H.M. Noman Khan         3.375          3.333              2.556   
2             Abdon Nababan         2.875          3.778              3.667   
3       Abdul Razak Hussein         2.875          3.222              4.000   
4        Abdul Samad Ismail         3.000          3.000              4.000   
..                      ...           ...            ...                ...   
422         Zacarias Sarian         2.875          2.667              2.444   
423     Zafrullah Chowdhury         2.875          3.000              3.556   
424  Zakiah Hanum Abd Hamid         3.375          3.111              2.667   
425           Élie Ducommun         2.625          4.111              4.444   
426     Óscar Arias Sánchez         2.375          3.667              3.222   

     Neuroticism  Openness  
0          3.750      

#### Results for Falcon 11B prompt1+2 bad 


In [25]:
df_bad=pd.read_csv('results_bad11b.csv')
df_bad=df_bad[0:19668]
print(df_bad.head())



              Name     source                                  Item  Itemnum  \
0  Osama bin Laden  Terrorist                          Is talkative        1   
1  Osama bin Laden  Terrorist       Tends to find fault with others        2   
2  Osama bin Laden  Terrorist                   Does a thorough job        3   
3  Osama bin Laden  Terrorist                    Is depressed, blue        4   
4  Osama bin Laden  Terrorist  Is original, comes up with new ideas        5   

   Answer  
0       4  
1       1  
2       4  
3       1  
4       1  


In [26]:
df_bad.shape

(19668, 5)

In [27]:

df_bad.groupby(['Name']).count()


Unnamed: 0_level_0,source,Item,Itemnum,Answer
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2 Pistols,44,44,44,44
Abdelhamid Abaaoud,44,44,44,44
Abdolmalek Rigi,44,44,44,44
Abdulaziz al-Omari,44,44,44,44
Abdullah Ahmed Abdullah,44,44,44,44
...,...,...,...,...
Yevno Azef,44,44,44,44
Yoo Young-chul,44,44,44,44
Yoshio Kodama,44,44,44,44
Zodiac Killer,44,44,44,44


In [28]:
df_bad.dtypes


Name       object
source     object
Item       object
Itemnum     int64
Answer      int64
dtype: object

In [29]:
res_df=bfi_calc(df_bad)
res_df.to_csv('bad_bfi.csv',index=False)

                        Name  Extraversion  Agreeableness  Conscientiousness  \
0                  2 Pistols         3.875          2.778              2.667   
1         Abdelhamid Abaaoud         3.000          3.778              2.778   
2            Abdolmalek Rigi         3.000          3.667              3.667   
3         Abdulaziz al-Omari         2.625          3.222              2.667   
4    Abdullah Ahmed Abdullah         2.750          2.667              3.444   
..                       ...           ...            ...                ...   
442               Yevno Azef         3.625          3.000              2.778   
443           Yoo Young-chul         2.250          2.444              3.444   
444            Yoshio Kodama         2.875          3.444              4.111   
445            Zodiac Killer         2.500          3.222              3.556   
446  Ángel Maturino Reséndiz         1.750          2.778              1.444   

     Neuroticism  Openness  
0         

In [30]:
df_bad=pd.read_csv('results_bad11b.csv')
df_bad=df_bad[19668:]
print(df_bad.head())

                  Name     source                                  Item  \
19668  Osama bin Laden  Terrorist                          Is talkative   
19669  Osama bin Laden  Terrorist       Tends to find fault with others   
19670  Osama bin Laden  Terrorist                   Does a thorough job   
19671  Osama bin Laden  Terrorist                    Is depressed, blue   
19672  Osama bin Laden  Terrorist  Is original, comes up with new ideas   

       Itemnum  Answer  
19668        1       1  
19669        2       4  
19670        3       4  
19671        4       1  
19672        5       5  


In [31]:
df_bad.shape

(19668, 5)

In [32]:
res_df=bfi_calc(df_bad)
res_df.to_csv('bad_bfi.csv', mode='a', header=False,index=False)

                        Name  Extraversion  Agreeableness  Conscientiousness  \
0                  2 Pistols         2.750          2.222              2.444   
1         Abdelhamid Abaaoud         2.875          2.667              3.333   
2            Abdolmalek Rigi         2.750          3.667              3.667   
3         Abdulaziz al-Omari         3.750          3.444              2.556   
4    Abdullah Ahmed Abdullah         3.875          2.889              3.556   
..                       ...           ...            ...                ...   
442               Yevno Azef         2.250          2.778              3.444   
443           Yoo Young-chul         2.875          3.111              3.667   
444            Yoshio Kodama         3.500          2.556              3.111   
445            Zodiac Killer         3.125          4.333              3.333   
446  Ángel Maturino Reséndiz         2.250          3.000              2.444   

     Neuroticism  Openness  
0         

### Results for Falcon 11B neutral prompt 1

In [33]:
df_neutral=pd.read_csv('results_neutral11b.csv')

print(df_neutral.head())

              Name source                                  Item  Itemnum  \
0  Michael Jackson  Actor                          Is talkative        1   
1  Michael Jackson  Actor       Tends to find fault with others        2   
2  Michael Jackson  Actor                   Does a thorough job        3   
3  Michael Jackson  Actor                    Is depressed, blue        4   
4  Michael Jackson  Actor  Is original, comes up with new ideas        5   

   Answer  
0       1  
1       1  
2       1  
3       1  
4       1  


In [34]:
df_neutral.shape

(24376, 5)

In [35]:
res_df=bfi_calc(df_neutral)
res_df.to_csv('neutral_bfi.csv',index=False)

                       Name  Extraversion  Agreeableness  Conscientiousness  \
0                   50 Cent         2.875          2.333              3.222   
1               A. A. Milne         2.750          2.778              3.222   
2    Abdelkader El Djezairi         2.375          3.444              3.000   
3           Abraham Lincoln         2.125          3.556              2.667   
4                Ada Yonath         4.125          3.000              2.556   
..                      ...           ...            ...                ...   
549     Zbigniew Brzezinski         3.125          3.778              2.778   
550                 Zendaya         3.000          1.889              3.000   
551           Zhan Beleniuk         3.375          4.444              2.778   
552      Zlatan Ibrahimovic         2.375          3.778              1.889   
553              Édith Piaf         4.250          3.444              3.778   

     Neuroticism  Openness  
0          1.875      