# Projeto Prático

* Grupo 1 - categóricos binários: red, green, blue, gold, white, black, orange, crescent, triangle, icon, animate, text;

* Grupo 2 - categóricos não binários: name, landmass, zone, language, religion, mainhue, topleft, botright;

* Grupo 3 - números contínuos: area, population;

* Grupo 4 - números discretos: bars, stripes, colours, circles, crosses, saltires, quarters, sunstars;

# Objetivos

* 1 - Normalizar todos os atributos numéricos para a faixa comum: [0.0, 1.0]

* 2 - Converter todos os atributos categóricos com k > 2 categorias para k atributos binários;

* 3 - Remover os atributos que não tomarão parte no processo de classificação (por exemplo: remover o atributo original language após o mesmo ter sido transformado em 10 atributos binários);

In [21]:
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')
flags = pd.read_csv("/content/drive/MyDrive/CDSI/MaterialApoio/flags.csv")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [22]:
# -----------------------------------------------------
# (1)  - Conversão dos atributos do Grupo 2
#        De: Categóricos não binários
#        Para: Categóricos binários
# -----------------------------------------------------
for c in flags.columns:
  if c in ['landmass', 'zone', 'language', 'religion', 'mainhue', 'topleft', 'botright']:
    dummies = pd.get_dummies(flags[c], prefix=c)
    flags = flags.join(dummies)


In [23]:
# -----------------------------------------------------
# (2) - Normalização dos atributos dos Grupos 3 e 4
#       De: Números contínuos e discretos
#       Para: Números ccom valores na faixa ente 0 e 1
# -----------------------------------------------------
for c in flags.columns:
  if c in ['area', 'population', 'bars', 'stripes', 'colours', 'circles', 'crosses', 'saltires', 'quarters', 'sunstars']:
    c_max = max(flags[c])
    c_min = min(flags[c])
    flags[c] = (flags[c] - c_min) / (c_max - c_min)


In [24]:
# -----------------------------------------------------
# (3) - Exclusão dos atributos indesejados
# -----------------------------------------------------
flags = flags.drop(columns=['name','landmass', 'zone', 'language', 'religion', 'mainhue', 'topleft', 'botright'])

In [26]:
# -----------------------------------------------------
# (4) - Imprimir a configuração final de flags
# -----------------------------------------------------
# imprime as primeiras linhas
print('head(): ')
print(flags.head())
print('-----------------------------------------')

# imprime as últimas linhas
print('tail(): ')
print(flags.tail())

flags

head(): 
       area  population  bars   stripes   colours  red  green  blue  gold  \
0  0.028926    0.015873   0.0  0.214286  0.571429    1      1     0     1   
1  0.001295    0.002976   0.0  0.000000  0.285714    1      0     0     1   
2  0.106598    0.019841   0.4  0.000000  0.285714    1      1     0     0   
3  0.000000    0.000000   0.0  0.000000  0.571429    1      0     1     1   
4  0.000000    0.000000   0.6  0.000000  0.285714    1      0     1     1   

   white  ...  topleft_red  topleft_white  botright_black  botright_blue  \
0      1  ...        False          False           False          False   
1      0  ...         True          False           False          False   
2      1  ...        False          False           False          False   
3      1  ...        False          False           False          False   
4      0  ...        False          False           False          False   

   botright_brown  botright_gold  botright_green  botright_orange  \
0 

Unnamed: 0,area,population,bars,stripes,colours,red,green,blue,gold,white,...,topleft_red,topleft_white,botright_black,botright_blue,botright_brown,botright_gold,botright_green,botright_orange,botright_red,botright_white
0,0.028926,0.015873,0.0,0.214286,0.571429,1,1,0,1,1,...,False,False,False,False,False,False,True,False,False,False
1,0.001295,0.002976,0.0,0.000000,0.285714,1,0,0,1,0,...,True,False,False,False,False,False,False,False,True,False
2,0.106598,0.019841,0.4,0.000000,0.285714,1,1,0,0,1,...,False,False,False,False,False,False,False,False,False,True
3,0.000000,0.000000,0.0,0.000000,0.571429,1,0,1,1,1,...,False,False,False,False,False,False,False,False,True,False
4,0.000000,0.000000,0.6,0.000000,0.285714,1,0,1,1,0,...,False,False,False,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
189,0.000134,0.000000,0.0,0.000000,0.285714,1,0,1,0,1,...,False,False,False,False,False,False,False,False,True,False
190,0.011428,0.021825,0.0,0.214286,0.428571,1,0,1,1,1,...,False,False,False,False,False,False,False,False,True,False
191,0.040398,0.027778,0.0,0.000000,0.428571,1,1,0,1,0,...,False,False,False,False,False,False,True,False,False,False
192,0.033613,0.005952,0.6,0.000000,0.428571,1,1,0,0,0,...,False,False,False,False,True,False,False,False,False,False


In [27]:
flags.to_csv("/content/drive/MyDrive/CDSI/MaterialApoio/flags_transformado.csv", index=False)