In [1]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules

# Chargement du dataset
df = pd.read_csv('../HR_Analytics_Structure_Complet.csv')

# Sélection des facteurs de satisfaction et de l'environnement
cols_satisfaction = [
    'JobSatisfaction', 'EnvironmentSatisfaction', 'RelationshipSatisfaction',
    'WorkLifeBalance', 'JobInvolvement', 'JobLevel', 'OverTime', 'Department'
]

df_sat = df[cols_satisfaction].copy()

In [2]:
# Mapping des scores de satisfaction
sat_mapping = {1: 'Low', 2: 'Medium', 3: 'High', 4: 'Very_High'}

for col in ['JobSatisfaction', 'EnvironmentSatisfaction', 'RelationshipSatisfaction', 'WorkLifeBalance']:
    df_sat[col] = df_sat[col].map(sat_mapping) + "_" + col

# Mapping de l'implication au travail
inv_mapping = {1: 'Low_Inv', 2: 'Medium_Inv', 3: 'High_Inv', 4: 'VeryHigh_Inv'}
df_sat['JobInvolvement'] = df_sat['JobInvolvement'].map(inv_mapping)

# Mapping du niveau de poste
df_sat['JobLevel'] = df_sat['JobLevel'].apply(lambda x: f"Level_{x}")

display(df_sat.head())

Unnamed: 0,JobSatisfaction,EnvironmentSatisfaction,RelationshipSatisfaction,WorkLifeBalance,JobInvolvement,JobLevel,OverTime,Department
0,Very_High_JobSatisfaction,Medium_EnvironmentSatisfaction,Low_RelationshipSatisfaction,Low_WorkLifeBalance,High_Inv,Level_2,Yes,Sales
1,Medium_JobSatisfaction,High_EnvironmentSatisfaction,Very_High_RelationshipSatisfaction,High_WorkLifeBalance,Medium_Inv,Level_2,No,Research & Development
2,High_JobSatisfaction,Very_High_EnvironmentSatisfaction,Medium_RelationshipSatisfaction,High_WorkLifeBalance,Medium_Inv,Level_1,Yes,Research & Development
3,High_JobSatisfaction,Very_High_EnvironmentSatisfaction,High_RelationshipSatisfaction,High_WorkLifeBalance,High_Inv,Level_1,Yes,Research & Development
4,Medium_JobSatisfaction,Low_EnvironmentSatisfaction,Very_High_RelationshipSatisfaction,High_WorkLifeBalance,High_Inv,Level_1,No,Research & Development


In [3]:
# Transformation One-Hot
basket = pd.get_dummies(df_sat)

# Conversion en booléens
basket = basket.astype(bool)

print(f"Format prêt : {basket.shape[1]} critères analysés.")

Format prêt : 29 critères analysés.


In [4]:
# Extraction des itemsets fréquents (min_support de 5% pour capter des profils précis)
frequent_itemsets = apriori(basket, min_support=0.05, use_colnames=True)

# Génération des règles d'association
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2)

# Filtrer pour voir uniquement les facteurs qui causent une Satisfaction Très Élevée
high_sat_rules = rules[rules['consequents'].apply(lambda x: 'Very_High_JobSatisfaction' in str(x))]

# Trier par Lift (force de l'association)
high_sat_rules = high_sat_rules.sort_values(by='lift', ascending=False)

print("Top 10 des combinaisons menant à une Satisfaction Très Élevée :")
display(high_sat_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head(10))

Top 10 des combinaisons menant à une Satisfaction Très Élevée :


Unnamed: 0,antecedents,consequents,support,confidence,lift
26,(Department_Sales),"(JobLevel_Level_2, JobSatisfaction_Very_High_J...",0.061209,0.19715,1.554273
27,(JobLevel_Level_2),"(Department_Sales, JobSatisfaction_Very_High_J...",0.061209,0.155431,1.49478
122,"(JobInvolvement_High_Inv, Department_Research ...","(JobLevel_Level_1, JobSatisfaction_Very_High_J...",0.064159,0.171598,1.393332
123,"(JobInvolvement_High_Inv, JobLevel_Level_1)","(Department_Research & Development, JobSatisfa...",0.064159,0.273585,1.363901
127,(JobLevel_Level_1),"(JobInvolvement_High_Inv, Department_Research ...",0.064159,0.160221,1.341109
22,(Department_Research & Development),"(JobLevel_Level_1, JobSatisfaction_Very_High_J...",0.102507,0.157775,1.281097
118,(Department_Research & Development),"(JobLevel_Level_1, WorkLifeBalance_High_WorkLi...",0.057522,0.088536,1.277175
23,(JobLevel_Level_1),"(Department_Research & Development, JobSatisfa...",0.102507,0.255985,1.276162
126,(Department_Research & Development),"(JobInvolvement_High_Inv, JobLevel_Level_1, Jo...",0.064159,0.098751,1.275304
135,(JobLevel_Level_1),"(Department_Research & Development, JobSatisfa...",0.071534,0.178637,1.268231
