## Manipulando arquivos com Pandas

### Importando e exportando arquivos de diferentes extenções

In [1]:
# Importando bilibotécas

import pandas as pd 

In [11]:

# Importando arquivo CSV da pasta 'data'

df = pd.read_csv("../data/clientes.csv") 
df

Unnamed: 0,idCliente,flEmail,flTwitch,flYouTube,flBlueSky,flInstagram,qtdePontos,dtCriacao,dtAtualizacao
0,000ff655-fa9f-4baa-a108-47f581ec52a1,0,0,0,0,0,686,,
1,001749bd-37b5-4b1e-8111-f9fbba90f530,0,0,0,0,0,50,,
2,0019bb9e-26d4-4ebf-8727-fc911ea28a92,0,0,0,0,0,2,,
3,0033b737-8235-4c0f-9801-dc4ca185af00,0,1,0,0,0,1090,0000-00-00 00:00:00.000,2025-02-19 12:48:24.632
4,00684343-40b5-4ce7-b2e8-71a5340973bf,0,0,0,0,0,0,,
...,...,...,...,...,...,...,...,...,...
2431,fecbdf63-3bf4-44e5-8b1a-0acc9d963603,0,0,0,0,0,111,,
2432,ff07d926-f09e-420b-bebf-3dba02ae5dff,0,1,0,0,0,54,2025-01-21 11:49:58.172,2025-02-28 11:55:04.999
2433,ff1ceaef-650c-422b-bdc3-6984e29e7aa5,0,0,0,0,0,162,,
2434,ff2cabd3-3316-4b3f-8494-c25f95e90524,0,1,0,0,0,57,2025-02-10 11:12:30.631,2025-02-10 12:37:47.892


In [3]:

# Exportando arquivo CSV

df.to_csv("clientes.csv", index=False) # index=False ignora criação de índice

In [4]:
# Exportando arquivos de outros tipos

df.to_parquet("clientes.parquet", index=False)
df.to_excel("clientes.xlsx", index=False)

In [5]:
# Importando arquivo Parquet

df_2 = pd.read_parquet("clientes.parquet") # necessário bibliotéca de apoio pyarrow
df_2

Unnamed: 0,idCliente,flEmail,flTwitch,flYouTube,flBlueSky,flInstagram,qtdePontos,dtCriacao,dtAtualizacao
0,000ff655-fa9f-4baa-a108-47f581ec52a1,0,0,0,0,0,686,,
1,001749bd-37b5-4b1e-8111-f9fbba90f530,0,0,0,0,0,50,,
2,0019bb9e-26d4-4ebf-8727-fc911ea28a92,0,0,0,0,0,2,,
3,0033b737-8235-4c0f-9801-dc4ca185af00,0,1,0,0,0,1090,0000-00-00 00:00:00.000,2025-02-19 12:48:24.632
4,00684343-40b5-4ce7-b2e8-71a5340973bf,0,0,0,0,0,0,,
...,...,...,...,...,...,...,...,...,...
2431,fecbdf63-3bf4-44e5-8b1a-0acc9d963603,0,0,0,0,0,111,,
2432,ff07d926-f09e-420b-bebf-3dba02ae5dff,0,1,0,0,0,54,2025-01-21 11:49:58.172,2025-02-28 11:55:04.999
2433,ff1ceaef-650c-422b-bdc3-6984e29e7aa5,0,0,0,0,0,162,,
2434,ff2cabd3-3316-4b3f-8494-c25f95e90524,0,1,0,0,0,57,2025-02-10 11:12:30.631,2025-02-10 12:37:47.892


In [6]:
# Importando arquivo Excel

df_3 = pd.read_excel("clientes.xlsx") # necessário bibliotéca de apoio openpyxl
df_3

Unnamed: 0,idCliente,flEmail,flTwitch,flYouTube,flBlueSky,flInstagram,qtdePontos,dtCriacao,dtAtualizacao
0,000ff655-fa9f-4baa-a108-47f581ec52a1,0,0,0,0,0,686,,
1,001749bd-37b5-4b1e-8111-f9fbba90f530,0,0,0,0,0,50,,
2,0019bb9e-26d4-4ebf-8727-fc911ea28a92,0,0,0,0,0,2,,
3,0033b737-8235-4c0f-9801-dc4ca185af00,0,1,0,0,0,1090,0000-00-00 00:00:00.000,2025-02-19 12:48:24.632
4,00684343-40b5-4ce7-b2e8-71a5340973bf,0,0,0,0,0,0,,
...,...,...,...,...,...,...,...,...,...
2431,fecbdf63-3bf4-44e5-8b1a-0acc9d963603,0,0,0,0,0,111,,
2432,ff07d926-f09e-420b-bebf-3dba02ae5dff,0,1,0,0,0,54,2025-01-21 11:49:58.172,2025-02-28 11:55:04.999
2433,ff1ceaef-650c-422b-bdc3-6984e29e7aa5,0,0,0,0,0,162,,
2434,ff2cabd3-3316-4b3f-8494-c25f95e90524,0,1,0,0,0,57,2025-02-10 11:12:30.631,2025-02-10 12:37:47.892


In [7]:
# Ajustando separador para leitura

texto_csv = '''
sml; 32
jnf; 31
eve; 12
kev; 5
'''
with open("texto.csv", 'w') as file:
    file.write(texto_csv)

df_csv = pd.read_csv("texto.csv", sep=';') # estipula ';' como separador
df_csv

Unnamed: 0,sml,32
0,jnf,31
1,eve,12
2,kev,5


In [8]:
# Importando Área de Transferência

texto_csv = pd.DataFrame(df_csv).to_clipboard() # move arquivo para área de transferência

df_clip = pd.read_clipboard(sep=';') # abre arquivo da área de transferência
df_clip

Unnamed: 0,sml,32
0,jnf,31
1,eve,12
2,kev,5


In [9]:
# Importando HTML


url = "https://pt.wikipedia.org/wiki/Unidades_federativas_do_Brasil"

df_uf = pd.read_html(url) # retorna uma listas de tabelas encontradas na url

df_uf = df_uf[1]

df_uf.to_csv("UFs_Brasil.csv", sep= ';')