In [18]:
# Importing the necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [19]:
# Install openpyxl package
%pip install openpyxl

# Load the data from the Excel file
df = pd.read_excel('cadastro_funcionarios.xlsx')
df.head()  # Displaying the first few rows of the dataset to understand its structure


Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


Unnamed: 0,ID,estado_civil,grau_instrucao,n_filhos,salario,idade,reg_proc
0,1,s,f,0,4.0,26,int
1,2,c,f,1,4.56,32,cap
2,3,c,f,2,5.25,36,cap
3,4,s,m,0,5.73,20,out
4,5,s,f,0,6.26,40,out


##### `👇 This cell evaluates and displays the mean, standard deviation, minimum, and maximum for the columns n_filhos, salario, and idade before any standardization is applied.` 


In [26]:
# Step 1: Evaluate the mean, std, max, and min before standardization
before_std_stats = {
    'mean_n_filhos': df['n_filhos'].mean(),
    'std_n_filhos': df['n_filhos'].std(),
    'min_n_filhos': df['n_filhos'].min(),
    'max_n_filhos': df['n_filhos'].max(),
    
    'mean_salario': df['salario'].mean(),
    'std_salario': df['salario'].std(),
    'min_salario': df['salario'].min(),
    'max_salario': df['salario'].max(),
    
    'mean_idade': df['idade'].mean(),
    'std_idade': df['idade'].std(),
    'min_idade': df['idade'].min(),
    'max_idade': df['idade'].max(),
}

# Display the statistics before standardization
before_std_stats


{'mean_n_filhos': np.float64(0.9413145539906104),
 'std_n_filhos': np.float64(1.2438392782971366),
 'min_n_filhos': np.int64(0),
 'max_n_filhos': np.int64(5),
 'mean_salario': np.float64(11.080657276995307),
 'std_salario': np.float64(4.215989065719285),
 'min_salario': np.float64(4.0),
 'max_salario': np.float64(23.9),
 'mean_idade': np.float64(34.62206572769953),
 'std_idade': np.float64(6.398189556826327),
 'min_idade': np.int64(20),
 'max_idade': np.int64(49)}

##### `👇 Here, the Z-Score standardization is applied to the columns n_filhos, salario, and idade. The formula used for Z-Score is (x - mean) / std. After standardization, the first few rows of the standardized DataFrame are displayed.`

In [32]:
# Step 2: Z-Score Standardization
df_zscore = df[['n_filhos', 'salario', 'idade']].apply(lambda x: (x - x.mean()) / x.std())

# Display the standardized data
df_zscore.head()


Unnamed: 0,n_filhos,salario,idade
0,-0.756781,-1.679477,-1.347579
1,0.047181,-1.546649,-0.409814
2,0.851143,-1.382987,0.215363
3,-0.756781,-1.269135,-2.285344
4,-0.756781,-1.143423,0.84054


##### `👇 In this cell, we calculate and display the statistics (mean, standard deviation, min, and max) for the columns n_filhos, salario, and idade after applying Z-Score standardization.`

In [33]:
# Step 3: Evaluate the mean, std, max, and min after Z-Score standardization
after_zscore_stats = {
    'mean_n_filhos_zscore': df_zscore['n_filhos'].mean(),
    'std_n_filhos_zscore': df_zscore['n_filhos'].std(),
    'min_n_filhos_zscore': df_zscore['n_filhos'].min(),
    'max_n_filhos_zscore': df_zscore['n_filhos'].max(),
    
    'mean_salario_zscore': df_zscore['salario'].mean(),
    'std_salario_zscore': df_zscore['salario'].std(),
    'min_salario_zscore': df_zscore['salario'].min(),
    'max_salario_zscore': df_zscore['salario'].max(),
    
    'mean_idade_zscore': df_zscore['idade'].mean(),
    'std_idade_zscore': df_zscore['idade'].std(),
    'min_idade_zscore': df_zscore['idade'].min(),
    'max_idade_zscore': df_zscore['idade'].max(),
}

# Display the statistics after Z-Score standardization
after_zscore_stats


{'mean_n_filhos_zscore': np.float64(-6.463270190306075e-17),
 'std_n_filhos_zscore': np.float64(1.0),
 'min_n_filhos_zscore': np.float64(-0.7567814993584266),
 'max_n_filhos_zscore': np.float64(3.2630304548397002),
 'mean_salario_zscore': np.float64(-4.6702339439631e-16),
 'std_salario_zscore': np.float64(1.0),
 'min_salario_zscore': np.float64(-1.679477144418847),
 'max_salario_zscore': np.float64(3.0406489493154316),
 'mean_idade_zscore': np.float64(3.419278423258698e-16),
 'std_idade_zscore': np.float64(1.0),
 'min_idade_zscore': np.float64(-2.2853442521250438),
 'max_idade_zscore': np.float64(2.247187918488666)}

##### `👇 Here, we apply range standardization using Min-Max scaling, which transforms the data to a 0-1 range. The first few rows of the scaled DataFrame are displayed after applying the transformation.`

In [34]:
# Step 4: Range Standardization (Min-Max Scaling)
scaler = MinMaxScaler()
df_range = pd.DataFrame(scaler.fit_transform(df[['n_filhos', 'salario', 'idade']]), columns=['n_filhos', 'salario', 'idade'])

# Display the scaled data
df_range.head()


Unnamed: 0,n_filhos,salario,idade
0,0.0,0.0,0.206897
1,0.2,0.028141,0.413793
2,0.4,0.062814,0.551724
3,0.0,0.086935,0.0
4,0.0,0.113568,0.689655


##### `👇 This cell calculates and displays the statistics (mean, standard deviation, min, and max) for the columns n_filhos, salario, and idade after applying Range standardization (Min-Max scaling).`

In [36]:
# Step 5: Evaluate the mean, std, max, and min after Range standardization
after_range_stats = {
    'mean_n_filhos_range': df_range['n_filhos'].mean(),
    'std_n_filhos_range': df_range['n_filhos'].std(),
    'min_n_filhos_range': df_range['n_filhos'].min(),
    'max_n_filhos_range': df_range['n_filhos'].max(),
    
    'mean_salario_range': df_range['salario'].mean(),
    'std_salario_range': df_range['salario'].std(),
    'min_salario_range': df_range['salario'].min(),
    'max_salario_range': df_range['salario'].max(),
    
    'mean_idade_range': df_range['idade'].mean(),
    'std_idade_range': df_range['idade'].std(),
    'min_idade_range': df_range['idade'].min(),
    'max_idade_range': df_range['idade'].max(),
}

# Display the statistics after Range standardization
after_range_stats


{'mean_n_filhos_range': np.float64(0.18826291079812207),
 'std_n_filhos_range': np.float64(0.24876785565942736),
 'min_n_filhos_range': np.float64(0.0),
 'max_n_filhos_range': np.float64(1.0),
 'mean_salario_range': np.float64(0.3558119234671008),
 'std_salario_range': np.float64(0.2118587470210696),
 'min_salario_range': np.float64(0.0),
 'max_salario_range': np.float64(1.0),
 'mean_idade_range': np.float64(0.5042091630241217),
 'std_idade_range': np.float64(0.22062722609745952),
 'min_idade_range': np.float64(0.0),
 'max_idade_range': np.float64(1.0)}