# Case 1 - Discover the Pattern of Success

## A. Import Library & Data

In [1]:
from openpyxl import load_workbook
import duckdb
import pandas as pd
import sys
import os

DB_FILE = 'employee_data.duckdb'
EXCEL_FILE = 'Study Case DA.xlsx'

wb = load_workbook(EXCEL_FILE, read_only=True)
sheet_names = wb.sheetnames
print(f"Sheets in '{EXCEL_FILE}': {sheet_names}")

Sheets in 'Study Case DA.xlsx': ['Talent Variable (TV) & Talent G', 'dim_companies', 'dim_areas', 'dim_positions', 'dim_departments', 'dim_divisions', 'dim_directorates', 'dim_grades', 'dim_education', 'dim_majors', 'dim_competency_pillars', 'employees', 'profiles_psych', 'papi_scores', 'strengths', 'performance_yearly', 'competencies_yearly']


## B. Data Preparation

### 1. Use SQL script and prepare some variables

In [2]:
SQL_SCHEMA_SCRIPT = '''
-- Dimension Tables
CREATE TABLE dim_companies (
  company_id BIGINT PRIMARY KEY,
  name TEXT UNIQUE NOT NULL
);

CREATE TABLE dim_areas (
  area_id BIGINT PRIMARY KEY,
  name TEXT UNIQUE NOT NULL
);

CREATE TABLE dim_positions (
  position_id BIGINT PRIMARY KEY,
  name TEXT UNIQUE NOT NULL
);

CREATE TABLE dim_departments (
  department_id BIGINT PRIMARY KEY,
  name TEXT UNIQUE NOT NULL
);

CREATE TABLE dim_divisions (
  division_id BIGINT PRIMARY KEY,
  name TEXT UNIQUE NOT NULL
);

CREATE TABLE dim_directorates (
  directorate_id BIGINT PRIMARY KEY,
  name TEXT UNIQUE NOT NULL
);

CREATE TABLE dim_grades (
  grade_id BIGINT PRIMARY KEY,
  name TEXT UNIQUE NOT NULL
);

CREATE TABLE dim_education (
  education_id BIGINT PRIMARY KEY,
  name TEXT UNIQUE NOT NULL
);

CREATE TABLE dim_majors (
  major_id BIGINT PRIMARY KEY,
  name TEXT UNIQUE NOT NULL
);

CREATE TABLE dim_competency_pillars (
  pillar_code VARCHAR(3) PRIMARY KEY,
  pillar_label TEXT NOT NULL
);

-- Fact & Profile Tables
CREATE TABLE employees (
  employee_id TEXT PRIMARY KEY,
  fullname TEXT,
  nip TEXT,
  company_id BIGINT REFERENCES dim_companies(company_id),
  area_id BIGINT REFERENCES dim_areas(area_id),
  position_id BIGINT REFERENCES dim_positions(position_id),
  department_id BIGINT REFERENCES dim_departments(department_id),
  division_id BIGINT REFERENCES dim_divisions(division_id),
  directorate_id BIGINT REFERENCES dim_directorates(directorate_id),
  grade_id BIGINT REFERENCES dim_grades(grade_id),
  education_id BIGINT REFERENCES dim_education(education_id),
  major_id BIGINT REFERENCES dim_majors(major_id),
  years_of_service_months BIGINT
);

CREATE TABLE profiles_psych (
  employee_id TEXT PRIMARY KEY REFERENCES employees(employee_id),
  pauli NUMERIC,
  faxtor NUMERIC,
  disc TEXT,
  disc_word TEXT,
  mbti TEXT,
  iq NUMERIC,
  gtq INT,
  tiki INT
);

CREATE TABLE papi_scores (
  employee_id TEXT REFERENCES employees(employee_id),
  scale_code TEXT,
  score INT,
  PRIMARY KEY (employee_id, scale_code)
);

CREATE TABLE strengths (
  employee_id TEXT REFERENCES employees(employee_id),
  rank INT,
  theme TEXT,
  PRIMARY KEY (employee_id, rank)
);

CREATE TABLE performance_yearly (
  employee_id TEXT REFERENCES employees(employee_id),
  year INT,
  rating INT,
  PRIMARY KEY (employee_id, year)
);

CREATE TABLE competencies_yearly (
  employee_id TEXT REFERENCES employees(employee_id),
  pillar_code VARCHAR(3) REFERENCES dim_competency_pillars(pillar_code),
  year INT,
  score INT,
  PRIMARY KEY (employee_id, pillar_code, year)
);

-- Indexes (Non-Primary Key)
CREATE INDEX performance_yearly_index_3 ON performance_yearly (year);
CREATE INDEX competencies_yearly_index_5 ON competencies_yearly (pillar_code, year);

-- Comments
COMMENT ON TABLE dim_competency_pillars IS 'Codes: GDR, CEX, IDS, QDD, STO, SEA, VCU, LIE, FTC, CSI';
COMMENT ON TABLE strengths IS 'CliftonStrengths rank 1..14';
'''

In [3]:
# Variable for data processing (table names, primary key variable, and string variable)
TABLE_NAMES = [
    'dim_companies',
    'dim_areas',
    'dim_positions',
    'dim_departments',
    'dim_divisions',
    'dim_directorates',
    'dim_grades',
    'dim_education',
    'dim_majors',
    'dim_competency_pillars',
    'employees',
    'profiles_psych',
    'papi_scores',
    'strengths',
    'performance_yearly',
    'competencies_yearly'
]

PK_COLUMNS = {
    'employees': ['employee_id'],
    'profiles_psych': ['employee_id'],
    'dim_competency_pillars': ['pillar_code'],
    'papi_scores': ['employee_id', 'scale_code'],
    'strengths': ['employee_id', 'rank'],
    'performance_yearly': ['employee_id', 'year'],
    'competencies_yearly': ['employee_id', 'pillar_code', 'year']
}

DTYPE_OVERRIDES = {
    'employees': {'employee_id': str, 'nip': str},
    'profiles_psych': {'employee_id': str},
    'papi_scores': {'employee_id': str, 'scale_code': str},
    'strengths': {'employee_id': str},
    'performance_yearly': {'employee_id': str},
    'competencies_yearly': {'employee_id': str, 'pillar_code': str},
    'dim_competency_pillars': {'pillar_code': str}
}

### 2. Define function for data extract

In [4]:
def create_schema(conn):
    """Executes the main SQL script to create all tables."""
    try:
        print("Connecting to database and creating schema...")
        conn.execute(SQL_SCHEMA_SCRIPT)
        print("Schema created successfully.")
    except duckdb.Error as e:
        print(f"Error creating schema: {e}")
        print("The database might already exist. If so, delete the file '{DB_FILE}' and try again.")
        sys.exit(1)

def load_data_from_excel(conn):
    """Loops through table names, reading from Excel sheets and inserting into DuckDB."""
    print(f"\nStarting data load from '{EXCEL_FILE}'...")
    
    for table_name in TABLE_NAMES:
        print(f"  - Loading data for table: '{table_name}'")
        try:
            # Get specific dtypes for this table
            dtypes = DTYPE_OVERRIDES.get(table_name, None)
            
            # Read the corresponding sheet from the Excel file
            df = pd.read_excel(EXCEL_FILE, sheet_name=table_name, engine='openpyxl', dtype=dtypes)
            
            if df.empty:
                print(f"    ...Sheet '{table_name}' is empty. Skipping.")
                continue

            # Handle NULLs in Primary Key columns
            if table_name in PK_COLUMNS:
                pk_cols = PK_COLUMNS[table_name]
                initial_rows = len(df)
                # Drop rows where any of the PK columns are null
                df.dropna(subset=pk_cols, inplace=True)
                dropped_rows = initial_rows - len(df)
                if dropped_rows > 0:
                    print(f"    ...Dropped {dropped_rows} rows with NULL values in primary key columns: {pk_cols}")

            if df.empty:
                print(f"    ...No valid data left after cleaning. Skipping.")
                continue

            # Register the DataFrame as a temporary view in DuckDB
            conn.register('temp_table', df)
            
            # Use INSERT BY NAME
            # This maps columns by name instead of by position.
            conn.execute(f"INSERT INTO {table_name} BY NAME SELECT * FROM temp_table")
            
            # Clean up the temporary view
            conn.unregister('temp_table')
            
            print(f"    ...Success: Loaded {len(df)} rows into '{table_name}'.")
            
        except FileNotFoundError:
            print(f"Error: The file '{EXCEL_FILE}' was not found.")
            sys.exit(1)
        except Exception as e:
            # This often happens if the sheet doesn't exist
            print(f"    ...Error loading sheet '{table_name}': {e}")
            print("    ...Skipping this table. Please check your Excel file.")

def verify_data(conn):
    """Runs a few simple queries to confirm data was loaded."""
    print("\nVerifying data load...")
    try:
        # Check employee count
        employee_count = conn.execute("SELECT COUNT(*) FROM employees").fetchone()[0]
        print(f"Total employees loaded: {employee_count}")
        
        # Check company count
        company_count = conn.execute("SELECT COUNT(*) FROM dim_companies").fetchone()[0]
        print(f"Total companies loaded: {company_count}")
        
        # Sample data from employees
        print("\nSample 5 employees:")
        print(conn.execute("SELECT employee_id, fullname, nip FROM employees LIMIT 5").df())
        
        # Sample data from performance
        print("\nSample 5 performance records:")
        print(conn.execute("SELECT * FROM performance_yearly LIMIT 5").df())
        
    except duckdb.Error as e:
        print(f"Error during verification: {e}")
        print("Verification failed. Data may be incomplete.")

### 3. Use function to extracting data from sources

In [5]:
if os.path.exists(DB_FILE):
    os.remove(DB_FILE)
    print(f"{DB_FILE} deleted successfully.")
else:
    print(f"{DB_FILE} does not exist.")

with duckdb.connect(database=DB_FILE, read_only=False) as conn:
    create_schema(conn)
    load_data_from_excel(conn)
    verify_data(conn)

print(f"\nAll done! Your database is saved as '{DB_FILE}'.")

employee_data.duckdb deleted successfully.
Connecting to database and creating schema...
Schema created successfully.

Starting data load from 'Study Case DA.xlsx'...
  - Loading data for table: 'dim_companies'
    ...Success: Loaded 4 rows into 'dim_companies'.
  - Loading data for table: 'dim_areas'
    ...Success: Loaded 4 rows into 'dim_areas'.
  - Loading data for table: 'dim_positions'
    ...Success: Loaded 6 rows into 'dim_positions'.
  - Loading data for table: 'dim_departments'
    ...Success: Loaded 6 rows into 'dim_departments'.
  - Loading data for table: 'dim_divisions'
    ...Success: Loaded 5 rows into 'dim_divisions'.
  - Loading data for table: 'dim_directorates'
    ...Success: Loaded 3 rows into 'dim_directorates'.
  - Loading data for table: 'dim_grades'
    ...Success: Loaded 3 rows into 'dim_grades'.
  - Loading data for table: 'dim_education'
    ...Success: Loaded 4 rows into 'dim_education'.
  - Loading data for table: 'dim_majors'
    ...Success: Loaded 6 row

### 4. Denormalized data for easy full scale data analytics

Aggregate performance_yearly, competencies_yearly, strengths, and papi_scores to prevent duplicate because of long format

In [6]:
SCRIPT_DENORMALIZED = '''
WITH
-- 1. Aggregate yearly performance
perf_agg AS (
  SELECT
    employee_id,
    avg(rating) AS avg_performance_rating,
    arg_max(rating, year) AS latest_performance_rating,
    arg_max(year, year) AS latest_performance_year,
    list(
      {
        'year' : year,
        'rating' : rating
      }
    ORDER BY
      year
    ) AS performance_history
  FROM
    performance_yearly
  GROUP BY
    employee_id
),

-- 2. Aggregate yearly competencies
latest_competencies AS (
  SELECT
    employee_id,
    pillar_code,
    arg_max(score, year) AS latest_score
  FROM
    competencies_yearly
  GROUP BY
    employee_id,
    pillar_code
),
comp_agg AS (
  SELECT
    employee_id,
    avg(latest_score) FILTER (WHERE pillar_code = 'GDR') AS comp_gdr_latest,
    avg(latest_score) FILTER (WHERE pillar_code = 'CEX') AS comp_cex_latest,
    avg(latest_score) FILTER (WHERE pillar_code = 'IDS') AS comp_ids_latest,
    avg(latest_score) FILTER (WHERE pillar_code = 'QDD') AS comp_qdd_latest,
    avg(latest_score) FILTER (WHERE pillar_code = 'STO') AS comp_sto_latest,
    avg(latest_score) FILTER (WHERE pillar_code = 'SEA') AS comp_sea_latest,
    avg(latest_score) FILTER (WHERE pillar_code = 'VCU') AS comp_vcu_latest,
    avg(latest_score) FILTER (WHERE pillar_code = 'LIE') AS comp_lie_latest,
    avg(latest_score) FILTER (WHERE pillar_code = 'FTC') AS comp_ftc_latest,
    avg(latest_score) FILTER (WHERE pillar_code = 'CSI') AS comp_csi_latest
  FROM
    latest_competencies
  GROUP BY
    employee_id
),
comp_history_agg AS (
  SELECT
    employee_id,
    list(
      {
        'year' : year,
        'pillar' : pillar_code,
        'score' : score
      }
    ORDER BY
      year,
      pillar_code
    ) AS competencies_history
  FROM
    competencies_yearly
  GROUP BY
    employee_id
),
-- 3. Aggregate strengths
strengths_agg AS (
  SELECT
    employee_id,
    min(theme) FILTER (WHERE rank = 1) AS strength_1,
    min(theme) FILTER (WHERE rank = 2) AS strength_2,
    min(theme) FILTER (WHERE rank = 3) AS strength_3,
    min(theme) FILTER (WHERE rank = 4) AS strength_4,
    min(theme) FILTER (WHERE rank = 5) AS strength_5,
    list(
      {
        'rank' : rank,
        'theme' : theme
      }
    ORDER BY
      rank
    ) AS strength_history
  FROM
    strengths
  GROUP BY
    employee_id
),
-- 4. Aggregate PAPI scores
papi_agg AS (
  SELECT
    employee_id,
    list(
      {
        'scale' : scale_code,
        'score' : score
      }
    ORDER BY
      scale_code
    ) AS papi_scores
  FROM
    papi_scores
  GROUP BY
    employee_id
)
-- 5. Finally, join everything together
SELECT
  -- Employee details
  e.employee_id,
  e.fullname,
  e.nip,
  e.years_of_service_months,

  -- Dimension Names
  d_co.name AS company,
  d_ar.name AS area,
  d_pos.name AS position,
  d_dep.name AS department,
  d_div.name AS division,
  d_dir.name AS directorate,
  d_gr.name AS grade,
  d_edu.name AS education,
  d_maj.name AS major,

  -- Psych Profile (1-to-1)
  p.pauli,
  p.faxtor,
  p.disc,
  p.disc_word,
  p.mbti,
  p.iq,
  p.gtq,
  p.tiki,

  -- Aggregated Performance
  pa.avg_performance_rating,
  pa.latest_performance_rating,
  pa.latest_performance_year,
  pa.performance_history,

  -- Aggregated Strengths
  sa.strength_1,
  sa.strength_2,
  sa.strength_3,
  sa.strength_4,
  sa.strength_5,
  sa.strength_history,

  -- Aggregated PAPI Scores
  papia.papi_scores,

  -- Aggregated Competencies
  ca.comp_gdr_latest,
  ca.comp_cex_latest,
  ca.comp_ids_latest,
  ca.comp_qdd_latest,
  ca.comp_sto_latest,
  ca.comp_sea_latest,
  ca.comp_vcu_latest,
  ca.comp_lie_latest,
  ca.comp_ftc_latest,
  ca.comp_csi_latest,
  ch.competencies_history

FROM
  employees e
  
  -- Join all dimension tables
  LEFT JOIN dim_companies d_co ON e.company_id = d_co.company_id
  LEFT JOIN dim_areas d_ar ON e.area_id = d_ar.area_id
  LEFT JOIN dim_positions d_pos ON e.position_id = d_pos.position_id
  LEFT JOIN dim_departments d_dep ON e.department_id = d_dep.department_id
  LEFT JOIN dim_divisions d_div ON e.division_id = d_div.division_id
  LEFT JOIN dim_directorates d_dir ON e.directorate_id = d_dir.directorate_id
  LEFT JOIN dim_grades d_gr ON e.grade_id = d_gr.grade_id
  LEFT JOIN dim_education d_edu ON e.education_id = d_edu.education_id
  LEFT JOIN dim_majors d_maj ON e.major_id = d_maj.major_id

  -- Join 1-to-1 profile table
  LEFT JOIN profiles_psych p ON e.employee_id = p.employee_id

  -- Join 1-to-many aggregated CTEs
  LEFT JOIN perf_agg pa ON e.employee_id = pa.employee_id
  LEFT JOIN comp_agg ca ON e.employee_id = ca.employee_id
  LEFT JOIN comp_history_agg ch ON e.employee_id = ch.employee_id -- ADDED JOIN
  LEFT JOIN strengths_agg sa ON e.employee_id = sa.employee_id
  LEFT JOIN papi_agg papia ON e.employee_id = papia.employee_id;
'''

In [7]:
with duckdb.connect(database=DB_FILE, read_only=False) as conn:
    conn.execute("USE main;")
    conn.execute(SCRIPT_DENORMALIZED)
    df_main = conn.fetch_df()

In [8]:
df_main.head()

Unnamed: 0,employee_id,fullname,nip,years_of_service_months,company,area,position,department,division,directorate,...,comp_cex_latest,comp_ids_latest,comp_qdd_latest,comp_sto_latest,comp_sea_latest,comp_vcu_latest,comp_lie_latest,comp_ftc_latest,comp_csi_latest,competencies_history
0,EMP100000,Rendra Pratama,806137,64,PT Aurora Beauty Indonesia,Store,Brand Executive,R&D,Product Dev,Technology,...,3.0,4.0,3.0,3.0,2.0,4.0,3.0,4.0,3.0,"[{'year': 2021, 'pillar': 'CEX', 'score': 1}, ..."
1,EMP100001,Wulan Setiawan,476388,16,PT Mandala Distribution Center,Store,HRBP,Operations,Talent Management,Technology,...,2.0,3.0,2.0,2.0,4.0,3.0,2.0,3.0,3.0,"[{'year': 2021, 'pillar': 'CEX', 'score': None..."
2,EMP100002,Julia Jatmiko Situmorang,941921,58,PT Aurora Beauty Indonesia,Plant,Sales Supervisor,Finance,Digital Marketing,Technology,...,5.0,3.0,5.0,4.0,3.0,4.0,3.0,4.0,3.0,"[{'year': 2021, 'pillar': 'CEX', 'score': 3}, ..."
3,EMP100003,Oka Halim,751615,15,PT Aurora Beauty Indonesia,Plant,HRBP,HR,Digital Marketing,Commercial,...,3.0,3.0,3.0,1.0,3.0,4.0,2.0,3.0,3.0,"[{'year': 2021, 'pillar': 'CEX', 'score': None..."
4,EMP100004,Dwi Pratama,443809,34,PT Lumo Cosmetics Asia,Store,Supply Planner,Operations,Product Dev,Technology,...,4.0,3.0,3.0,5.0,3.0,4.0,3.0,3.0,3.0,"[{'year': 2021, 'pillar': 'CEX', 'score': 3}, ..."


In [None]:
df_main.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2010 entries, 0 to 2009
Data columns (total 43 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   employee_id                2010 non-null   object 
 1   fullname                   2010 non-null   object 
 2   nip                        2010 non-null   object 
 3   years_of_service_months    2010 non-null   int64  
 4   company                    2010 non-null   object 
 5   area                       2010 non-null   object 
 6   position                   2010 non-null   object 
 7   department                 2010 non-null   object 
 8   division                   2010 non-null   object 
 9   directorate                2010 non-null   object 
 10  grade                      2010 non-null   object 
 11  education                  2010 non-null   object 
 12  major                      2010 non-null   object 
 13  pauli                      2010 non-null   float

**Final Data Overview**

-- Employee details
- employee_id,
- fullname,
- nip,
- years_of_service_months,

-- Dimension Names
- company,
- area,
- position,
- department,
- division,
- directorate,
- grade,
- education,
- major,

-- Psych Profile (1-to-1)
- pauli,
- faxtor,
- disc,
- disc_word,
- mbti,
- iq,
- gtq,
- tiki,

-- Aggregated Performance
- avg_performance_rating,
- latest_performance_rating,
- latest_performance_year,
- performance_history,

-- Aggregated Strengths
- strength_1,
- strength_2,
- strength_3,
- strength_4,
- strength_5,
- strength_history,

-- Aggregated PAPI Scores
- papi_scores,

-- Aggregated Competencies
- comp_gdr_latest,
- comp_cex_latest,
- comp_ids_latest,
- comp_qdd_latest,
- comp_sto_latest,
- comp_sea_latest,
- comp_vcu_latest,
- comp_lie_latest,
- comp_ftc_latest,
- comp_csi_latest,
- competencies_history

In [16]:
display(df_main[["employee_id","fullname","nip","years_of_service_months"]].head())
display(df_main[["company","area","position","department","division","directorate","grade","education","major"]].head())
display(df_main[["pauli","faxtor","disc","disc_word","mbti","iq","gtq","tiki"]].head())
display(df_main[["avg_performance_rating","latest_performance_rating","latest_performance_year","performance_history"]])


Unnamed: 0,employee_id,fullname,nip,years_of_service_months
0,EMP100000,Rendra Pratama,806137,64
1,EMP100001,Wulan Setiawan,476388,16
2,EMP100002,Julia Jatmiko Situmorang,941921,58
3,EMP100003,Oka Halim,751615,15
4,EMP100004,Dwi Pratama,443809,34


Unnamed: 0,company,area,position,department,division,directorate,grade,education,major
0,PT Aurora Beauty Indonesia,Store,Brand Executive,R&D,Product Dev,Technology,IV,S2,Business
1,PT Mandala Distribution Center,Store,HRBP,Operations,Talent Management,Technology,III,D3,Design
2,PT Aurora Beauty Indonesia,Plant,Sales Supervisor,Finance,Digital Marketing,Technology,III,S2,Business
3,PT Aurora Beauty Indonesia,Plant,HRBP,HR,Digital Marketing,Commercial,IV,S1,Psychology
4,PT Lumo Cosmetics Asia,Store,Supply Planner,Operations,Product Dev,Technology,III,D3,Psychology


Unnamed: 0,pauli,faxtor,disc,disc_word,mbti,iq,gtq,tiki
0,86.0,75.0,SI,Steadiness-Influencer,,94.0,33,2
1,48.0,52.0,DS,Dominant-Steadiness,INTP,94.0,17,3
2,66.0,38.0,DC,Dominant-Conscientious,,109.0,20,3
3,39.0,63.0,SI,Steadiness-Influencer,ENTJ,85.0,39,3
4,75.0,100.0,,Steadiness-Conscientious,INTJ,134.0,21,9


Unnamed: 0,avg_performance_rating,latest_performance_rating,latest_performance_year,performance_history
0,2.4,3,2025,"[{'year': 2021, 'rating': 2}, {'year': 2022, '..."
1,3.5,3,2025,"[{'year': 2021, 'rating': None}, {'year': 2022..."
2,3.0,4,2025,"[{'year': 2021, 'rating': 2}, {'year': 2022, '..."
3,2.5,3,2025,"[{'year': 2021, 'rating': None}, {'year': 2022..."
4,3.2,4,2025,"[{'year': 2021, 'rating': 3}, {'year': 2022, '..."
...,...,...,...,...
2005,2.8,5,2025,"[{'year': 2021, 'rating': 1}, {'year': 2022, '..."
2006,3.6,3,2025,"[{'year': 2021, 'rating': 3}, {'year': 2022, '..."
2007,3.2,2,2025,"[{'year': 2021, 'rating': 4}, {'year': 2022, '..."
2008,4.0,5,2025,"[{'year': 2021, 'rating': None}, {'year': 2022..."


In [19]:
display(df_main[["strength_1","strength_2","strength_3","strength_4","strength_5","strength_history"]].head())
display(df_main[["papi_scores"]])
display(df_main[["comp_gdr_latest","comp_cex_latest","comp_ids_latest","comp_qdd_latest","comp_sto_latest"]])
display(df_main[["comp_sea_latest","comp_vcu_latest","comp_lie_latest","comp_ftc_latest","comp_csi_latest","competencies_history"]])

Unnamed: 0,strength_1,strength_2,strength_3,strength_4,strength_5,strength_history
0,,Adaptability,Context,Competition,Activator,"[{'rank': 1, 'theme': None}, {'rank': 2, 'them..."
1,Responsibility,Includer,Significance,Context,Adaptability,"[{'rank': 1, 'theme': 'Responsibility'}, {'ran..."
2,Restorative,Maximizer,Woo,Includer,Command,"[{'rank': 1, 'theme': 'Restorative'}, {'rank':..."
3,Belief,Intellection,Arranger,,Adaptability,"[{'rank': 1, 'theme': 'Belief'}, {'rank': 2, '..."
4,Deliberative,Command,Woo,Communication,Ideation,"[{'rank': 1, 'theme': 'Deliberative'}, {'rank'..."


Unnamed: 0,papi_scores
0,"[{'scale': 'Papi_A', 'score': 1}, {'scale': 'P..."
1,"[{'scale': 'Papi_A', 'score': 3}, {'scale': 'P..."
2,"[{'scale': 'Papi_A', 'score': 2}, {'scale': 'P..."
3,"[{'scale': 'Papi_A', 'score': None}, {'scale':..."
4,"[{'scale': 'Papi_A', 'score': 5}, {'scale': 'P..."
...,...
2005,"[{'scale': 'Papi_A', 'score': 6}, {'scale': 'P..."
2006,"[{'scale': 'Papi_A', 'score': 1}, {'scale': 'P..."
2007,"[{'scale': 'Papi_A', 'score': 9}, {'scale': 'P..."
2008,"[{'scale': 'Papi_A', 'score': 1}, {'scale': 'P..."


Unnamed: 0,comp_gdr_latest,comp_cex_latest,comp_ids_latest,comp_qdd_latest,comp_sto_latest
0,3.0,3.0,4.0,3.0,3.0
1,2.0,2.0,3.0,2.0,2.0
2,3.0,5.0,3.0,5.0,4.0
3,4.0,3.0,3.0,3.0,1.0
4,4.0,4.0,3.0,3.0,5.0
...,...,...,...,...,...
2005,5.0,4.0,5.0,5.0,5.0
2006,3.0,3.0,4.0,2.0,3.0
2007,1.0,2.0,2.0,1.0,2.0
2008,5.0,5.0,5.0,5.0,5.0


Unnamed: 0,comp_sea_latest,comp_vcu_latest,comp_lie_latest,comp_ftc_latest,comp_csi_latest,competencies_history
0,2.0,4.0,3.0,4.0,3.0,"[{'year': 2021, 'pillar': 'CEX', 'score': 1}, ..."
1,4.0,3.0,2.0,3.0,3.0,"[{'year': 2021, 'pillar': 'CEX', 'score': None..."
2,3.0,4.0,3.0,4.0,3.0,"[{'year': 2021, 'pillar': 'CEX', 'score': 3}, ..."
3,3.0,4.0,2.0,3.0,3.0,"[{'year': 2021, 'pillar': 'CEX', 'score': None..."
4,3.0,4.0,3.0,3.0,3.0,"[{'year': 2021, 'pillar': 'CEX', 'score': 3}, ..."
...,...,...,...,...,...,...
2005,5.0,5.0,2.0,5.0,5.0,"[{'year': 2021, 'pillar': 'CEX', 'score': 1}, ..."
2006,3.0,3.0,3.0,2.0,3.0,"[{'year': 2021, 'pillar': 'CEX', 'score': 2}, ..."
2007,2.0,3.0,2.0,2.0,2.0,"[{'year': 2021, 'pillar': 'CEX', 'score': 5}, ..."
2008,5.0,5.0,4.0,4.0,5.0,"[{'year': 2021, 'pillar': 'CEX', 'score': None..."


## C. Data Exploration

Based on data type we will divide into three general section of analysis
- Analysis between employee
- Analysis yearly trends
- Analysis different type of strength and PAPI