# Trabalho de Inteligência Artificial

In [33]:
#imports
import pandas as pd
import matplotlib.pyplot as plt

## Inciando Tratamento de Dados

In [34]:
csv_path = "/content/pet_adoption_data.csv"
df = pd.read_csv(csv_path)

In [35]:
print("Colunas do dataset:", df.columns.tolist())

Colunas do dataset: ['PetID', 'PetType', 'Breed', 'AgeMonths', 'Color', 'Size', 'WeightKg', 'Vaccinated', 'HealthCondition', 'TimeInShelterDays', 'AdoptionFee', 'PreviousOwner', 'AdoptionLikelihood']


### Limpeza e normalização simples

In [36]:
# Filtrando apenas linhas onde PetType == "Cat"
df_cats = df[df["PetType"] == "Cat"].copy()

# Arredondar a coluna Weight (em kg) para inteiro
df_cats["WeightKg"] = df_cats["WeightKg"].round(0).astype(int)

# Remover a coluna PetID
df_cats = df_cats.drop(columns=["PetID"])

In [37]:
print("Total de registros:", len(df))
print("Registros apenas com gatos:", len(df_cats))


df_cats.head()

Total de registros: 2007
Registros apenas com gatos: 505


Unnamed: 0,PetType,Breed,AgeMonths,Color,Size,WeightKg,Vaccinated,HealthCondition,TimeInShelterDays,AdoptionFee,PreviousOwner,AdoptionLikelihood
7,Cat,Siamese,13,Orange,Large,7,1,0,3,137,0,1
13,Cat,Siamese,27,Black,Large,28,1,0,5,135,0,0
14,Cat,Persian,160,Brown,Medium,6,1,0,11,404,0,1
16,Cat,Persian,8,Orange,Small,12,1,1,64,405,1,0
17,Cat,Persian,50,White,Medium,29,1,0,13,109,0,1


In [38]:
#Verificando existe nulidade nas colunas do df
print(df_cats.isnull().sum())

PetType               0
Breed                 0
AgeMonths             0
Color                 0
Size                  0
WeightKg              0
Vaccinated            0
HealthCondition       0
TimeInShelterDays     0
AdoptionFee           0
PreviousOwner         0
AdoptionLikelihood    0
dtype: int64


### Verificando se os valores binários são apenas 0 ou 1

In [39]:
binarias = ["Vaccinated", "HealthCondition", "PreviousOwner", "AdoptionLikelihood"]

for col in binarias:
    print(f"{col} → valores únicos antes: {df_cats[col].unique()}")
    df_cats[col] = df_cats[col].apply(lambda x: 1 if x > 1 else (0 if x < 0 else x))
    print(f"{col} → valores únicos depois: {df_cats[col].unique()}")


Vaccinated → valores únicos antes: [1 0]
Vaccinated → valores únicos depois: [1 0]
HealthCondition → valores únicos antes: [0 1]
HealthCondition → valores únicos depois: [0 1]
PreviousOwner → valores únicos antes: [0 1]
PreviousOwner → valores únicos depois: [0 1]
AdoptionLikelihood → valores únicos antes: [1 0]
AdoptionLikelihood → valores únicos depois: [1 0]


### Removendo gatos com  peso em kg fora do padrão esperado

In [40]:
df_cats = df_cats[(df_cats["WeightKg"] >= 1) & (df_cats["WeightKg"] <= 15)]

# Exibir total final
print("Total de registros válidos:", len(df_cats))
print(df_encoded.dtypes)
print(df_cats.head())

Total de registros válidos: 250
AgeMonths             int64
WeightKg              int64
Vaccinated            int64
HealthCondition       int64
TimeInShelterDays     int64
AdoptionFee           int64
PreviousOwner         int64
AdoptionLikelihood    int64
Breed_Siamese          bool
Color_Brown            bool
Color_Gray             bool
Color_Orange           bool
Color_White            bool
Size_Medium            bool
Size_Small             bool
dtype: object
   PetType    Breed  AgeMonths   Color    Size  WeightKg  Vaccinated  \
7      Cat  Siamese         13  Orange   Large         7           1   
14     Cat  Persian        160   Brown  Medium         6           1   
16     Cat  Persian          8  Orange   Small        12           1   
26     Cat  Persian        172  Orange   Large         2           1   
43     Cat  Persian        121    Gray   Large         3           1   

    HealthCondition  TimeInShelterDays  AdoptionFee  PreviousOwner  \
7                 0            

### Tratamento colunas categóricas para conversão numérica para uso com ML

In [41]:
categoricas = df_cats.select_dtypes(include=["object"]).columns.tolist()
print("Colunas categóricas:", categoricas)

df_encoded = pd.get_dummies(df_cats, columns=categoricas, drop_first=True)

print("Original:", df_cats.shape)
print("Após encoding:", df_encoded.shape)
df_encoded.head()


Colunas categóricas: ['PetType', 'Breed', 'Color', 'Size']
Original: (250, 12)
Após encoding: (250, 15)


Unnamed: 0,AgeMonths,WeightKg,Vaccinated,HealthCondition,TimeInShelterDays,AdoptionFee,PreviousOwner,AdoptionLikelihood,Breed_Siamese,Color_Brown,Color_Gray,Color_Orange,Color_White,Size_Medium,Size_Small
7,13,7,1,0,3,137,0,1,True,False,False,True,False,False,False
14,160,6,1,0,11,404,0,1,False,True,False,False,False,True,False
16,8,12,1,1,64,405,1,0,False,False,False,True,False,False,True
26,172,2,1,0,28,72,0,0,False,False,False,True,False,False,False
43,121,3,1,1,18,233,0,0,False,False,True,False,False,False,False


### Export csv tratado

In [42]:
df_cats.to_csv("/content/pet_adoption_data_cats_clean.csv", index=False)