## Transformações de dados no Pandas

### Possibilidades:

- Criação de colunas a partir de operações com colunas
- Tipagem - convertendo tipo de dados de uma coluna
- Subistituição - Substituindo dados
- Trabalhando com Data

In [45]:
# Importando bibliotécas

import pandas as pd


In [46]:
# Operando e reatribuindo transformação

df = pd.read_csv("../data/clientes.csv")

df['qtdePontos'] = df['qtdePontos'] * 10

df.head()

Unnamed: 0,idCliente,flEmail,flTwitch,flYouTube,flBlueSky,flInstagram,qtdePontos,dtCriacao,dtAtualizacao
0,000ff655-fa9f-4baa-a108-47f581ec52a1,0,0,0,0,0,6860,,
1,001749bd-37b5-4b1e-8111-f9fbba90f530,0,0,0,0,0,500,,
2,0019bb9e-26d4-4ebf-8727-fc911ea28a92,0,0,0,0,0,20,,
3,0033b737-8235-4c0f-9801-dc4ca185af00,0,1,0,0,0,10900,0000-00-00 00:00:00.000,2025-02-19 12:48:24.632
4,00684343-40b5-4ce7-b2e8-71a5340973bf,0,0,0,0,0,0,,


In [47]:
# Operando e criando nova coluna

df['email_ou_twitch'] = df['flEmail'] + df['flTwitch']

df['email_e_twitch'] = df['flEmail'] * df['flTwitch']

df.head()

Unnamed: 0,idCliente,flEmail,flTwitch,flYouTube,flBlueSky,flInstagram,qtdePontos,dtCriacao,dtAtualizacao,email_ou_twitch,email_e_twitch
0,000ff655-fa9f-4baa-a108-47f581ec52a1,0,0,0,0,0,6860,,,0,0
1,001749bd-37b5-4b1e-8111-f9fbba90f530,0,0,0,0,0,500,,,0,0
2,0019bb9e-26d4-4ebf-8727-fc911ea28a92,0,0,0,0,0,20,,,0,0
3,0033b737-8235-4c0f-9801-dc4ca185af00,0,1,0,0,0,10900,0000-00-00 00:00:00.000,2025-02-19 12:48:24.632,1,0
4,00684343-40b5-4ce7-b2e8-71a5340973bf,0,0,0,0,0,0,,,0,0


In [48]:
# Operando e criando nova coluna a partir varias colunas

df['qtdeSocial'] = df['flEmail'] + df['flTwitch'] + df['flYouTube'] + df['flBlueSky'] + df['flInstagram']

df['todasSocial'] = df['flEmail'] * df['flTwitch'] * df['flYouTube'] * df['flBlueSky'] * df['flInstagram']

df.head()

Unnamed: 0,idCliente,flEmail,flTwitch,flYouTube,flBlueSky,flInstagram,qtdePontos,dtCriacao,dtAtualizacao,email_ou_twitch,email_e_twitch,qtdeSocial,todasSocial
0,000ff655-fa9f-4baa-a108-47f581ec52a1,0,0,0,0,0,6860,,,0,0,0,0
1,001749bd-37b5-4b1e-8111-f9fbba90f530,0,0,0,0,0,500,,,0,0,0,0
2,0019bb9e-26d4-4ebf-8727-fc911ea28a92,0,0,0,0,0,20,,,0,0,0,0
3,0033b737-8235-4c0f-9801-dc4ca185af00,0,1,0,0,0,10900,0000-00-00 00:00:00.000,2025-02-19 12:48:24.632,1,0,1,0
4,00684343-40b5-4ce7-b2e8-71a5340973bf,0,0,0,0,0,0,,,0,0,0,0


In [49]:
# Operando a partir de calculos envolvendo outras bibliotécas

import numpy as np

df['logPontos'] = np.log(df['qtdePontos'] + 1) # Logaritmo
df['logPontos'].describe()

count    2436.000000
mean        6.741565
std         2.478406
min         0.000000
25%         6.216606
50%         6.947937
75%         8.222419
max        12.637826
Name: logPontos, dtype: float64

In [50]:
# Convertendo tipagem de Series

print(df['qtdePontos'].dtypes)


df['qtdePontos'] = df['qtdePontos'].astype(float)

print("Tipagem transformada: ",df['qtdePontos'].dtypes)


int64
Tipagem transformada:  float64


In [51]:
# Substituindo valores 

df['dtCriacao'] = df['dtCriacao'].replace({ # recebe um dicionário
    "0000-00-00 00:00:00.000": "2024-02-01 09:00:00.000",
    })

In [52]:
# Convertendo tipagem para Data

df['dtCriacao'] = pd.to_datetime(df['dtCriacao'])

print(df['dtCriacao'].dtypes)

datetime64[ns]


In [53]:
# Algumas funções de transformação de data - atributo '.dt'

filtro = df['dtCriacao'].notna()

df = df[filtro]

print(df['dtCriacao'].dt.date.sample(),'\n') # Somente data

print(df['dtCriacao'].dt.year.sample(),'\n') # Ano

print(df['dtCriacao'].dt.month.sample(),'\n') # Mês

print(df['dtCriacao'].dt.day.sample(),'\n') # Dia

print(df['dtCriacao'].dt.day_name().sample(),'\n') # Nome dia

print(df['dtCriacao'].dt.month_name().sample(),'\n') # Nome Mês


1008    2024-02-01
Name: dtCriacao, dtype: object 

1193    2025
Name: dtCriacao, dtype: int32 

857    2
Name: dtCriacao, dtype: int32 

341    10
Name: dtCriacao, dtype: int32 

1266    Thursday
Name: dtCriacao, dtype: object 

1948    October
Name: dtCriacao, dtype: object 

