# Préparation de l'environnement
## Chargement des librairies et configuration des paramètres d'affichage

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import tarfile
import os

# Configuring display settings
plt.rcParams['figure.figsize'] = (12, 9)
sns.set()
sns.set_context('talk')
np.set_printoptions(threshold=20, precision=2, suppress=True)
pd.set_option('display.max_rows', 30)
pd.set_option('display.max_columns', None)
pd.set_option('display.precision', 2)
pd.set_option('display.float_format', '{:.2f}'.format)
warnings.filterwarnings("ignore", category=FutureWarning)

## Chargement et exploration des données

In [8]:
# Path to the dataset
src_path = "src"
general_data_file_path = os.path.join(src_path, "general_data.csv")
# Load the dataset

general_data = pd.read_csv(general_data_file_path)

## Afficher les informations générales du jeu de données

In [9]:
general_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4410 entries, 0 to 4409
Data columns (total 24 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Age                      4410 non-null   int64  
 1   Attrition                4410 non-null   object 
 2   BusinessTravel           4410 non-null   object 
 3   Department               4410 non-null   object 
 4   DistanceFromHome         4410 non-null   int64  
 5   Education                4410 non-null   int64  
 6   EducationField           4410 non-null   object 
 7   EmployeeCount            4410 non-null   int64  
 8   EmployeeID               4410 non-null   int64  
 9   Gender                   4410 non-null   object 
 10  JobLevel                 4410 non-null   int64  
 11  JobRole                  4410 non-null   object 
 12  MaritalStatus            4410 non-null   object 
 13  MonthlyIncome            4410 non-null   int64  
 14  NumCompaniesWorked      

## Afficher les statistiques descriptives du jeu de données

In [10]:
general_data.describe(include='all')

Unnamed: 0,Age,Attrition,BusinessTravel,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeID,Gender,JobLevel,JobRole,MaritalStatus,MonthlyIncome,NumCompaniesWorked,Over18,PercentSalaryHike,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,YearsAtCompany,YearsSinceLastPromotion,YearsWithCurrManager
count,4410.0,4410,4410,4410,4410.0,4410.0,4410,4410.0,4410.0,4410,4410.0,4410,4410,4410.0,4391.0,4410,4410.0,4410.0,4410.0,4401.0,4410.0,4410.0,4410.0,4410.0
unique,,2,3,3,,,6,,,2,,9,3,,,1,,,,,,,,
top,,No,Travel_Rarely,Research & Development,,,Life Sciences,,,Male,,Sales Executive,Married,,,Y,,,,,,,,
freq,,3699,3129,2883,,,1818,,,2646,,978,2019,,,4410,,,,,,,,
mean,36.92,,,,9.19,2.91,,1.0,2205.5,,2.06,,,65029.31,2.69,,15.21,8.0,0.79,11.28,2.8,7.01,2.19,4.12
std,9.13,,,,8.11,1.02,,0.0,1273.2,,1.11,,,47068.89,2.5,,3.66,0.0,0.85,7.78,1.29,6.13,3.22,3.57
min,18.0,,,,1.0,1.0,,1.0,1.0,,1.0,,,10090.0,0.0,,11.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,30.0,,,,2.0,2.0,,1.0,1103.25,,1.0,,,29110.0,1.0,,12.0,8.0,0.0,6.0,2.0,3.0,0.0,2.0
50%,36.0,,,,7.0,3.0,,1.0,2205.5,,2.0,,,49190.0,2.0,,14.0,8.0,1.0,10.0,3.0,5.0,1.0,3.0
75%,43.0,,,,14.0,4.0,,1.0,3307.75,,3.0,,,83800.0,4.0,,18.0,8.0,1.0,15.0,3.0,9.0,3.0,7.0


## Afficher les premières lignes du jeu de données

In [11]:
general_data.head()

Unnamed: 0,Age,Attrition,BusinessTravel,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeID,Gender,JobLevel,JobRole,MaritalStatus,MonthlyIncome,NumCompaniesWorked,Over18,PercentSalaryHike,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,YearsAtCompany,YearsSinceLastPromotion,YearsWithCurrManager
0,51,No,Travel_Rarely,Sales,6,2,Life Sciences,1,1,Female,1,Healthcare Representative,Married,131160,1.0,Y,11,8,0,1.0,6,1,0,0
1,31,Yes,Travel_Frequently,Research & Development,10,1,Life Sciences,1,2,Female,1,Research Scientist,Single,41890,0.0,Y,23,8,1,6.0,3,5,1,4
2,32,No,Travel_Frequently,Research & Development,17,4,Other,1,3,Male,4,Sales Executive,Married,193280,1.0,Y,15,8,3,5.0,2,5,0,3
3,38,No,Non-Travel,Research & Development,2,5,Life Sciences,1,4,Male,3,Human Resources,Married,83210,3.0,Y,11,8,3,13.0,5,8,7,5
4,32,No,Travel_Rarely,Research & Development,10,1,Medical,1,5,Male,1,Sales Executive,Single,23420,4.0,Y,12,8,2,9.0,2,6,0,4


# Bibliographie
## Sources méthodologiques et théoriques : 
références traitant des bases théoriques et des modèles méthodologiques employés.

## Sources sur les aspects techniques : 
ouvrages ou articles décrivant les techniques spécifiques utilisées.

## Sources éthiques et sociétales : 
ressources relatives aux normes et aux enjeux éthiques dans votre domaine.

## Sources spécifiques au projet : 
articles, études de cas ou rapports ayant directement inspiré ou orienté vos choix.