In [1]:
import pandas as pd

df_candidate = pd.read_csv("data_clean/candidate_profile_clean.csv")
df_interview = pd.read_csv("data_clean/interview_score_clean.csv")
df_salary = pd.read_csv("data_clean/salary_offer_clean.csv")


In [2]:
score_summary = (
    df_interview
    .groupby("candidate_id")["score"]
    .agg(
        avg_score="mean",
        max_score="max",
        min_score="min",
        interview_rounds="count"
    )
    .reset_index()
)


In [3]:
df_merge_1 = df_candidate.merge(
    score_summary,
    on="candidate_id",
    how="left"
)


In [4]:
df_final = df_merge_1.merge(
    df_salary,
    on="candidate_id",
    how="left"
)


In [5]:
no_interview = df_final[df_final["avg_score"].isna()]
no_interview[["candidate_id", "full_name"]]


Unnamed: 0,candidate_id,full_name
11,UV111,Tran Thi B
17,UV117,Le Van C


In [6]:
no_salary_offer = df_final[
    (df_final["avg_score"].notna()) &
    (df_final["offer_salary"].isna())
]

no_salary_offer[["candidate_id", "full_name", "avg_score"]]


Unnamed: 0,candidate_id,full_name,avg_score
5,UV105,Tran Thi B,7.333333
13,UV113,Hoang Van E,7.666667
14,UV114,Nguyen Van A,7.0
18,UV118,Tran Thi B,7.0
23,UV123,Tran Thi B,6.333333


In [7]:
salary_only = df_salary[
    ~df_salary["candidate_id"].isin(df_candidate["candidate_id"])
]

salary_only


Unnamed: 0,candidate_id,offer_salary,position


In [8]:
df_final.info()
df_final.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   candidate_id      25 non-null     object 
 1   full_name         25 non-null     object 
 2   experience_years  15 non-null     float64
 3   avg_score         23 non-null     float64
 4   max_score         23 non-null     float64
 5   min_score         23 non-null     float64
 6   interview_rounds  25 non-null     int64  
 7   offer_salary      20 non-null     float64
 8   position          25 non-null     object 
dtypes: float64(5), int64(1), object(3)
memory usage: 1.9+ KB


Unnamed: 0,candidate_id,full_name,experience_years,avg_score,max_score,min_score,interview_rounds,offer_salary,position
0,UV100,Hoang Van E,3.0,9.0,9.0,9.0,1,15000000.0,Data Analyst
1,UV101,Le Van C,,7.666667,9.0,7.0,3,20000000.0,Data Analyst
2,UV102,Le Van C,,5.0,5.0,5.0,1,25000000.0,Data Analyst
3,UV103,Tran Thi B,,6.0,6.0,6.0,2,15000000.0,Python Developer
4,UV104,Pham Thi D,,6.0,7.0,5.0,3,15000000.0,Data Analyst


In [9]:
df_final.to_csv(
    "data_clean/hr_full_data.csv",
    index=False
)
