In [315]:
# Importações das bibliotecas
import pandas as pd

# Definição das opções de exibição do pandas
pd.set_option('display.max_rows', 500) # Define o número de linhas a  serem exibidas (500)
pd.set_option('display.max_columns', 500) # Define o número de colunas a  serem exibidas (500)
pd.set_option('display.width', 1000) # Define a largura máxima de exibição em caracteres (1000)

import warnings, requests, zipfile, io
from scipy.io import arff

In [316]:
# Carregamento dos dados

# Definição da URL do arquivo CSV a ser lido
url = 'imports-85.data'

# Definição da lista de nomes de colunas que serão atribuídas ao dataframe
col_names = ['symboling', 'normalized-losses', 'fuel-type', 'aspiration', 'num-of-doors', 'body-style', 'drive-wheels', 'engine-location', 'wheel-base','length', 'width', 'height', 'curb-weight', 'engine-type', 'num-of-cylinders', 'engine-size', 'fuel-system', 'bore', 'stroke', 'compression-ratio', 'horsepower', 'peak-rpm', 'city-mpg', 'highway-mpg', 'price']

# Leitura do arquivo CSV para criar o dataframe 'df_car'
df_car = pd.read_csv(url, sep=',', names=col_names, na_values='?', header=None)

In [317]:
# Verificando o shape
df_car.shape

(205, 25)

In [318]:
# Examinando os dados
df_car.head(5)

Unnamed: 0,symboling,normalized-losses,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,length,width,height,curb-weight,engine-type,num-of-cylinders,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
3,,alfa-romero,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,48.8,2548,dohc,four,130,mpfi,3.47,2.68,9.0,111.0,5000.0,21,27,13495.0
3,,alfa-romero,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,48.8,2548,dohc,four,130,mpfi,3.47,2.68,9.0,111.0,5000.0,21,27,16500.0
1,,alfa-romero,gas,std,two,hatchback,rwd,front,94.5,171.2,65.5,52.4,2823,ohcv,six,152,mpfi,2.68,3.47,9.0,154.0,5000.0,19,26,16500.0
2,164.0,audi,gas,std,four,sedan,fwd,front,99.8,176.6,66.2,54.3,2337,ohc,four,109,mpfi,3.19,3.4,10.0,102.0,5500.0,24,30,13950.0
2,164.0,audi,gas,std,four,sedan,4wd,front,99.4,176.6,66.4,54.3,2824,ohc,five,136,mpfi,3.19,3.4,8.0,115.0,5500.0,18,22,17450.0


In [319]:
# Exibindo as informações
df_car.info()

<class 'pandas.core.frame.DataFrame'>
Index: 205 entries, 3 to -1
Data columns (total 25 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   symboling          164 non-null    float64
 1   normalized-losses  205 non-null    object 
 2   fuel-type          205 non-null    object 
 3   aspiration         205 non-null    object 
 4   num-of-doors       203 non-null    object 
 5   body-style         205 non-null    object 
 6   drive-wheels       205 non-null    object 
 7   engine-location    205 non-null    object 
 8   wheel-base         205 non-null    float64
 9   length             205 non-null    float64
 10  width              205 non-null    float64
 11  height             205 non-null    float64
 12  curb-weight        205 non-null    int64  
 13  engine-type        205 non-null    object 
 14  num-of-cylinders   205 non-null    object 
 15  engine-size        205 non-null    int64  
 16  fuel-system        205 non-null 

In [320]:
# Verificando colunas
df_car.columns

Index(['symboling', 'normalized-losses', 'fuel-type', 'aspiration', 'num-of-doors', 'body-style', 'drive-wheels', 'engine-location', 'wheel-base', 'length', 'width', 'height', 'curb-weight', 'engine-type', 'num-of-cylinders', 'engine-size', 'fuel-system', 'bore', 'stroke', 'compression-ratio', 'horsepower', 'peak-rpm', 'city-mpg', 'highway-mpg', 'price'], dtype='object')

In [321]:
# Copiando colunas
df_car = df_car[['aspiration', 'num-of-doors', 'drive-wheels', 'num-of-cylinders']].copy()

# Examinando os dados
df_car.head()

Unnamed: 0,aspiration,num-of-doors,drive-wheels,num-of-cylinders
3,std,two,rwd,four
3,std,two,rwd,four
1,std,two,rwd,six
2,std,four,fwd,four
2,std,four,4wd,five


In [322]:
# Verificando os componentes
df_car.info()

<class 'pandas.core.frame.DataFrame'>
Index: 205 entries, 3 to -1
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   aspiration        205 non-null    object
 1   num-of-doors      203 non-null    object
 2   drive-wheels      205 non-null    object
 3   num-of-cylinders  205 non-null    object
dtypes: object(4)
memory usage: 8.0+ KB


In [323]:
# Determinação dos valores ordinais
df_car['num-of-doors'].value_counts()

num-of-doors
four    114
two      89
Name: count, dtype: int64

In [324]:
# Mapeamento
# Definindo um dicionário (mapper) para converter as representações

door_mapper = {'two':2, 'four':4}

In [325]:
# Gerando nova coluna
# Aplicando dicionário de mapeamento 'door_mapper' à coluna categórica 'num-of-doors'BaseExceptionGroup

df_car['doors'] = df_car['num-of-doors'].replace(door_mapper)

  df_car['doors'] = df_car['num-of-doors'].replace(door_mapper)


In [326]:
# Exibindo o dataframe

df_car.head()

Unnamed: 0,aspiration,num-of-doors,drive-wheels,num-of-cylinders,doors
3,std,two,rwd,four,2.0
3,std,two,rwd,four,2.0
1,std,two,rwd,six,2.0
2,std,four,fwd,four,4.0
2,std,four,4wd,five,4.0


In [327]:
# Gerando novas colunas
df_car['num-of-cylinders'].value_counts()

num-of-cylinders
four      159
six        24
five       11
eight       5
two         4
twelve      1
three       1
Name: count, dtype: int64

In [328]:
# Criando mapeador
cylinder_mapper = {
    'two': 2,
    'three': 3,
    'four': 4,
    'five': 5,
    'six': 6,
    'eight': 8,
    'twelve': 12
}

In [329]:
# Mapeamento com replace
df_car['cylinders'] = df_car['num-of-cylinders'].replace(cylinder_mapper)

  df_car['cylinders'] = df_car['num-of-cylinders'].replace(cylinder_mapper)


In [330]:
df_car.head()

Unnamed: 0,aspiration,num-of-doors,drive-wheels,num-of-cylinders,doors,cylinders
3,std,two,rwd,four,2.0,4
3,std,two,rwd,four,2.0,4
1,std,two,rwd,six,2.0,6
2,std,four,fwd,four,4.0,4
2,std,four,4wd,five,4.0,5


In [331]:
# Codificando os dados não ordinais
df_car['drive-wheels'].value_counts()

drive-wheels
fwd    120
rwd     76
4wd      9
Name: count, dtype: int64

In [332]:
# Adicionando novos componentes
df_car = pd.get_dummies(df_car, columns=['drive-wheels'])

In [333]:
# Exibindo componentes
df_car.head()

Unnamed: 0,aspiration,num-of-doors,num-of-cylinders,doors,cylinders,drive-wheels_4wd,drive-wheels_fwd,drive-wheels_rwd
3,std,two,four,2.0,4,False,False,True
3,std,two,four,2.0,4,False,False,True
1,std,two,six,2.0,6,False,False,True
2,std,four,four,4.0,4,False,True,False
2,std,four,five,4.0,5,True,False,False


In [334]:
# Calculando e exibindo a contagem de frequência
df_car['aspiration'].value_counts()

aspiration
std      168
turbo     37
Name: count, dtype: int64

In [335]:
# Aplicando o One-Hot Encoder

df_car = pd.get_dummies(
    df_car,  # O DataFrame de entrada
    columns=['aspiration'], # A coluna categórica a ser codificada
    drop_first=True # Paramêtro que determina se 'aspiration' tem os valores 'std' e 'turbo', esta opção cria apenas uma coluna binária (e.g., 'aspiration_turbo')
    )

In [336]:
# Visualização do One-Hot Encoder
df_car.head()

Unnamed: 0,num-of-doors,num-of-cylinders,doors,cylinders,drive-wheels_4wd,drive-wheels_fwd,drive-wheels_rwd,aspiration_turbo
3,two,four,2.0,4,False,False,True,False
3,two,four,2.0,4,False,False,True,False
1,two,six,2.0,6,False,False,True,False
2,four,four,4.0,4,False,True,False,False
2,four,five,4.0,5,True,False,False,False
