In [1]:
import pandas as pd
import numpy as np
import statistics
from statistics import mode
import re
import sys
import matplotlib.pyplot as plt

### Fill the data frame

In [2]:
df = pd.read_csv("data/clean_data/IPIP_300_AN.csv")

In [3]:
df.head(3)

Unnamed: 0,item#,Item,Sign,Key,Dimension,Facet,f_dimension,f_facet
0,i1,Worry about things.,+N1,N1,Neuroticism,Anxiety,dimension1,dimension1_facet3
1,i10,Like order.,+C2,C2,Conscientiousness,Orderliness,dimension3,dimension3_facet2
2,i100,Love order and regularity.,+C2,C2,Conscientiousness,Orderliness,dimension3,dimension3_facet2


In [4]:
def mod_dim(dim):
    lis = [df.iloc[i]['Dimension'] for i,e in df.iterrows() if df.iloc[i]['f_dimension'] == f'{dim}']
    try:
        return mode(lis)
    except:
        return f"Dimension not accurate enough"
    
def mod_fac(dim):
    lis = [df.iloc[i]['Facet'] for i,e in df.iterrows() if df.iloc[i]['f_facet'] == f'{dim}']
    try:
        return mode(lis)
    except:
        return f"Facet not accurate enough"

In [5]:
df['fa_dim'] = df['f_dimension'].apply(mod_dim)
df['fa_facet'] = df['f_facet'].apply(mod_fac)

In [6]:
df.drop(columns = ['f_dimension', 'f_facet'], inplace=True)

In [7]:
df.head(3)

Unnamed: 0,item#,Item,Sign,Key,Dimension,Facet,fa_dim,fa_facet
0,i1,Worry about things.,+N1,N1,Neuroticism,Anxiety,Neuroticism,Anxiety
1,i10,Like order.,+C2,C2,Conscientiousness,Orderliness,Conscientiousness,Orderliness
2,i100,Love order and regularity.,+C2,C2,Conscientiousness,Orderliness,Conscientiousness,Orderliness


### Dimension analysis

In [8]:
def dim_err(row):
    if row['Dimension'] == row['fa_dim']:
        val = 0
    elif row['Dimension'] != row['fa_dim']:
        val = 1
    return val

def fac_err(row):
    if row['Facet'] == row['fa_facet']:
        val = 0
    elif row['Facet'] != row['fa_facet']:
        val = 1
    return val

In [9]:
df['dim_error'] = df.apply(dim_err ,axis=1)
df['fac_error'] = df.apply(fac_err ,axis=1)

In [11]:
#def dimension_erros()

In [12]:
d_error = pd.pivot_table(df, values='dim_error', index=['Dimension'], columns=['fa_dim'], aggfunc=np.sum, fill_value=0)
d_error

fa_dim,Agreeableness,Conscientiousness,Extraversion,Neuroticism,Openness
Dimension,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Agreeableness,0,1,0,2,0
Conscientiousness,4,0,0,1,3
Extraversion,2,8,0,0,0
Neuroticism,0,0,1,0,0
Openness,4,0,6,4,0


#### Deep dive into dimension item analysis

In [74]:
def get_dim_df(df, df_err, dim):
    if dim in set(list(df['Dimension'])):
        dff = df.loc[df['Dimension'] == str(dim)].drop(columns = ['dim_error','fac_error'])
        print(df_err.loc[dim].idxmax())
        #break
        return dff.loc[dff['fa_dim'] == df_err.loc[dim].idxmax()]
    else:
        return f'Sorry the dimension {dim} is not specified'

In [75]:
d_error.loc['Extraversion'].idxmax()

'Conscientiousness'

In [76]:
get_dim_df(df, d_error,'Agreeableness')

Neuroticism


Unnamed: 0,item#,Item,Sign,Key,Dimension,Facet,fa_dim,fa_facet
83,i174,Think highly of myself.,-A5,A5,Agreeableness,Modesty,Neuroticism,Depression
117,i204,Have a high opinion of myself.,-A5,A5,Agreeableness,Modesty,Neuroticism,Depression


### Factor analysis