<a href="https://colab.research.google.com/github/Luckaz7/crypto-etl-pipeline/blob/main/crypto_etl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [11]:
from google.colab import userdata

In [2]:
import pandas as pd
import requests
import datetime
import pytz
import os
from bs4 import BeautifulSoup

In [3]:
def extract_dados(link: str, headers: dict) -> str:
  try:
    response = requests.get(link, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    preco = soup.find('span', {'data-test': 'text-cdp-price-display'}).text
    preco = preco.replace('$','').replace(',','')
    return preco
  except Exception as e:
    print(f'Aconteceu um erro ao extrair os dados: {e}')
    return None

In [4]:
def trans_dados(nome: str, simbolo: str, preco: float, data: str, hora: str) -> dict:
  try:
    dados = {
      'moeda':nome,
      'simbolo':simbolo,
      'preco':preco,
      'data':data,
      'hora':hora,
      'timestamp': pd.to_datetime(f"{data} {hora}", dayfirst=True)
      }
    return dados
  except Exception as e:
    print(f'Ocorreu um erro no tratamento dos dados coletados: {e}')
    return None

In [5]:
def data_atual() -> str:
  try:
    fuso_br = pytz.timezone('America/Sao_Paulo')
    data = datetime.datetime.now(fuso_br)
    return data.strftime('%d-%m-%Y')
  except Exception as e:
    print(f'Ocorreu um erro ao gerar a data da coleta: {e}')
    return None

In [6]:
def hora_atual() -> str:
  try:
    fuso_br = pytz.timezone('America/Sao_Paulo')
    hora = datetime.datetime.now(fuso_br)
    return hora.strftime('%H:%M:%S')
  except Exception as e:
    print(f'Ocorreu uma erro ao gerar a hora da coleta: {e}')
    return None

In [7]:
def carga_dados(dados: list, path: str) -> pd.DataFrame:
  df_novo = pd.DataFrame(dados)
  try:
    if os.path.exists(path):
      df_historico = pd.read_csv(path)
      df_atualizado = pd.concat([df_historico, df_novo], ignore_index=True)
      return df_atualizado
    else:
      return df_novo
  except Exception as e:
    print(f'Ocorreu um erro ao fazer a carga dos dados: {e}')
    return None

In [18]:
headers = {
    userdata.get('header')
}

link_usdt = "https://coinmarketcap.com/currencies/tether/"
link_eth = "https://coinmarketcap.com/currencies/ethereum/"
link_bnb = "https://coinmarketcap.com/currencies/bnb/"
link_btc = "https://coinmarketcap.com/currencies/bitcoin/"
link_sol = "https://coinmarketcap.com/currencies/solana/"
link_ada = "https://coinmarketcap.com/currencies/cardano/"

In [9]:
moedas = {
    'Tether': ('USDT', link_usdt),
    'Ethereum': ('ETH', link_eth),
    'Bnb': ('BNB', link_bnb),
    'Bitcoin': ('BTC', link_btc),
    'Solana': ('SOL', link_sol),
    'Cardano': ('ADA', link_ada)
}

dados = []
for nome, (simbolo, link) in moedas.items():
  preco = extract_dados(link, headers)
  dados.append(trans_dados(nome, simbolo, preco, data_atual(),hora_atual()))

In [17]:
path = userdata.get('path')

In [21]:
df_final = carga_dados(dados, path)
df_final

Unnamed: 0,moeda,simbolo,preco,data,hora,timestamp
0,Tether,USDT,0.9994,09-02-2026,21:55:36,2026-02-09 21:55:36
1,Ethereum,ETH,2096.12,09-02-2026,21:55:37,2026-02-09 21:55:37
2,Bnb,BNB,635.96,09-02-2026,21:55:37,2026-02-09 21:55:37
3,Bitcoin,BTC,69728.5,09-02-2026,21:55:37,2026-02-09 21:55:37
4,Solana,SOL,86.35,09-02-2026,21:55:37,2026-02-09 21:55:37
5,Cardano,ADA,0.2688,09-02-2026,21:55:37,2026-02-09 21:55:37
6,Tether,USDT,0.9994,09-02-2026,21:55:36,2026-02-09 21:55:36
7,Ethereum,ETH,2096.12,09-02-2026,21:55:37,2026-02-09 21:55:37
8,Bnb,BNB,635.96,09-02-2026,21:55:37,2026-02-09 21:55:37
9,Bitcoin,BTC,69728.5,09-02-2026,21:55:37,2026-02-09 21:55:37


In [19]:
df_final.to_csv(path, index=False)