In [1]:
import pandas as pd
import numpy as np
import statistics
from statistics import mode
import re
import sys
import matplotlib.pyplot as plt

### Fill the data frame

In [5]:
df = pd.read_csv("../../output/IPIP_300_AN.csv")

In [15]:
df120 = pd.read_csv("../../output/IPIP_120_AN.csv")

In [6]:
df.head(3)

Unnamed: 0,item#,Item,Sign,Key,Dimension,Facet,f_dimension,f_facet
0,i1,Worry about things.,+N1,N1,Neuroticism,Anxiety,dimension1,dimension1_facet3
1,i10,Like order.,+C2,C2,Conscientiousness,Orderliness,dimension3,dimension3_facet2
2,i100,Love order and regularity.,+C2,C2,Conscientiousness,Orderliness,dimension3,dimension3_facet2


In [7]:
def mod_dim(dim):
    lis = [df.iloc[i]['Dimension'] for i,e in df.iterrows() if df.iloc[i]['f_dimension'] == f'{dim}']
    try:
        return mode(lis)
    except:
        return f"Dimension not accurate enough"
    
def mod_fac(dim):
    lis = [df.iloc[i]['Facet'] for i,e in df.iterrows() if df.iloc[i]['f_facet'] == f'{dim}']
    try:
        return mode(lis)
    except:
        return f"Facet not accurate enough"

In [8]:
df['fa_dim'] = df['f_dimension'].apply(mod_dim)
df['fa_facet'] = df['f_facet'].apply(mod_fac)

In [9]:
df.drop(columns = ['f_dimension', 'f_facet'], inplace=True)

In [10]:
df.head(3)

Unnamed: 0,item#,Item,Sign,Key,Dimension,Facet,fa_dim,fa_facet
0,i1,Worry about things.,+N1,N1,Neuroticism,Anxiety,Neuroticism,Anxiety
1,i10,Like order.,+C2,C2,Conscientiousness,Orderliness,Conscientiousness,Orderliness
2,i100,Love order and regularity.,+C2,C2,Conscientiousness,Orderliness,Conscientiousness,Orderliness


### Dimension analysis

In [11]:
def dim_err(row):
    if row['Dimension'] == row['fa_dim']:
        val = 0
    elif row['Dimension'] != row['fa_dim']:
        val = 1
    return val

def fac_err(row):
    if row['Facet'] == row['fa_facet']:
        val = 0
    elif row['Facet'] != row['fa_facet']:
        val = 1
    return val

In [12]:
df['dim_error'] = df.apply(dim_err ,axis=1)
df['fac_error'] = df.apply(fac_err ,axis=1)

In [25]:
df.head(3)

Unnamed: 0,item#,Item,Sign,Key,Dimension,Facet,fa_dim,fa_facet,dim_error,fac_error
0,i1,Worry about things.,+N1,N1,Neuroticism,Anxiety,Neuroticism,Anxiety,0,0
1,i10,Like order.,+C2,C2,Conscientiousness,Orderliness,Conscientiousness,Orderliness,0,0
2,i100,Love order and regularity.,+C2,C2,Conscientiousness,Orderliness,Conscientiousness,Orderliness,0,0


In [14]:
d_error = pd.pivot_table(df, values='dim_error', index=['Dimension'], columns=['fa_dim'], aggfunc=np.sum, fill_value=0)
d_error

fa_dim,Agreeableness,Conscientiousness,Extraversion,Neuroticism,Openness
Dimension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Agreeableness,0,1,0,2,0
Conscientiousness,4,0,0,1,3
Extraversion,2,8,0,0,0
Neuroticism,0,0,1,0,0
Openness,4,0,6,4,0


#### Deep dive into dimension item analysis

In [22]:
def get_dim_df(df, df_err, dim):
    if dim in set(list(df['Dimension'])):
        dff = df.loc[df['Dimension'] == str(dim)].drop(columns = ['dim_error','fac_error'])
        #print(df_err.loc[dim].idxmax())
        #break
        return dff.loc[dff['fa_dim'] == df_err.loc[dim].idxmax()]
    else:
        return f'Sorry the dimension {dim} is not specified'

In [23]:
d_error.loc['Extraversion'].idxmax()


'Conscientiousness'

In [24]:
res_df = get_dim_df(df, d_error,'Agreeableness')
res_df

Unnamed: 0,item#,Item,Sign,Key,Dimension,Facet,fa_dim,fa_facet
83,i174,Think highly of myself.,-A5,A5,Agreeableness,Modesty,Neuroticism,Depression
117,i204,Have a high opinion of myself.,-A5,A5,Agreeableness,Modesty,Neuroticism,Depression


### Facet analysis

In [32]:
def get_fac_df(df):
    dff = df.loc[(df['fa_facet'] == 'Facet not accurate enough')]
    return dff

In [33]:
fac_error = get_fac_df(df)
fac_error

Unnamed: 0,item#,Item,Sign,Key,Dimension,Facet,fa_dim,fa_facet,dim_error,fac_error
9,i107,Can manage many things at the same time.,+E4,E4,Extraversion,Activity Level,Conscientiousness,Facet not accurate enough,1,1
11,i109,Have a sharp tongue.,-A4,A4,Agreeableness,Cooperation,Agreeableness,Facet not accurate enough,0,1
13,i110,Plunge into tasks with all my heart.,+C4,C4,Conscientiousness,Achievement-Striving,Conscientiousness,Facet not accurate enough,0,1
17,i114,Seldom toot my own horn.,+A5,A5,Agreeableness,Modesty,Agreeableness,Facet not accurate enough,0,1
18,i115,Get to work at once.,+C5,C5,Conscientiousness,Self-Discipline,Conscientiousness,Facet not accurate enough,0,1
37,i132,Take control of things.,+E3,E3,Extraversion,Assertiveness,Conscientiousness,Facet not accurate enough,1,1
46,i140,Do more than what's expected of me.,+C4,C4,Conscientiousness,Achievement-Striving,Conscientiousness,Facet not accurate enough,0,1
51,i145,Carry out my plans.,+C5,C5,Conscientiousness,Self-Discipline,Conscientiousness,Facet not accurate enough,0,1
62,i155,Know how to get things done.,+C1,C1,Conscientiousness,Self-Efficacy,Conscientiousness,Facet not accurate enough,0,1
77,i169,Love a good fight.,-A4,A4,Agreeableness,Cooperation,Agreeableness,Facet not accurate enough,0,1


### Item analysis

In [29]:
def get_it_df(df):
    dff = df.loc[(df['dim_error'] == 1)]
    return dff

In [31]:
it_error = get_it_df(df)
it_error

Unnamed: 0,item#,Item,Sign,Key,Dimension,Facet,fa_dim,fa_facet,dim_error,fac_error
9,i107,Can manage many things at the same time.,+E4,E4,Extraversion,Activity Level,Conscientiousness,Facet not accurate enough,1,1
29,i125,Come up with good solutions.,+C1,C1,Conscientiousness,Self-Efficacy,Openness,Adventurousness,1,1
34,i13,Experience my emotions intensely.,+O3,O3,Openness,Emotionality,Neuroticism,Anxiety,1,1
37,i132,Take control of things.,+E3,E3,Extraversion,Assertiveness,Conscientiousness,Facet not accurate enough,1,1
40,i135,Listen to my conscience.,+C3,C3,Conscientiousness,Dutifulness,Agreeableness,Morality,1,1
71,i163,Seldom get emotional.,-O3,O3,Openness,Emotionality,Neuroticism,Anxiety,1,1
73,i165,Break rules.,-C3,C3,Conscientiousness,Dutifulness,Agreeableness,Morality,1,1
75,i167,Like to take it easy.,-E4,E4,Extraversion,Activity Level,Conscientiousness,Activity Level,1,0
76,i168,Dislike changes.,-O4,O4,Openness,Adventurousness,Extraversion,Adventurousness,1,0
78,i17,Am always busy.,+E4,E4,Extraversion,Activity Level,Conscientiousness,Facet not accurate enough,1,1


In [83]:
dims[1]

'Extraversion'

In [78]:
dims = list(set(list(df['Dimension'])))

In [80]:
res_df = {i:get_dim_df(df, d_error,f'{i}') for i in dims}

    item#                                      Item Sign Key     Dimension  \
9    i107  Can manage many things at the same time.  +E4  E4  Extraversion   
37   i132                   Take control of things.  +E3  E3  Extraversion   
75   i167                     Like to take it easy.  -E4  E4  Extraversion   
78    i17                           Am always busy.  +E4  E4  Extraversion   
108  i197                     Like to take my time.  -E4  E4  Extraversion   
142  i227               Like a leisurely lifestyle.  -E4  E4  Extraversion   
175  i257     Let things proceed at their own pace.  -E4  E4  Extraversion   
275   i77                Do a lot in my spare time.  +E4  E4  Extraversion   

              Facet             fa_dim                   fa_facet  
9    Activity Level  Conscientiousness  Facet not accurate enough  
37    Assertiveness  Conscientiousness  Facet not accurate enough  
75   Activity Level  Conscientiousness             Activity Level  
78   Activity Level  Cons

In [84]:
res_df['Neuroticism']

Unnamed: 0,item#,Item,Sign,Key,Dimension,Facet,fa_dim,fa_facet
213,i291,Never splurge.,-N5,N5,Neuroticism,Immoderation,Extraversion,Cheerfulness
