In [91]:
import pandas as pd
import numpy as np
from typing import List

In [92]:
def df_show_domain(df: pd.DataFrame, columns: List[str]) -> None:
  """
  Exibe o tipo e os valores possíveis de columns segundo a ordem da lista de
  colunas.

  :param df: DataFrame a ter os domínios exibidos.
  :param columns: Lista dos nomes das colunas a serem exibidas.
  :return: None
  """
  for col in columns:
    print(f"Coluna:   {col}")
    print(f"dtype:    {df[col].dtype}")
    print(f"Domínio:  {df[col].unique()}\n")

In [93]:
# CARREGA ARQUIVO (carregado temporariamente na sessão do Colab)
df = pd.read_csv("Dados_Tratados_EmissoesCO2.csv")

# Exibe resultado
display(df.head())
df_show_domain(df, df.columns)

Unnamed: 0,_estado,_mes,_ano,car_c02_emitido
0,PA,1,2000,438735643592919.06
1,PA,2,2000,438735643592919.06
2,PA,3,2000,438735643592919.06
3,PA,4,2000,438735643592919.06
4,PA,5,2000,438735643592919.06


Coluna:   _estado
dtype:    object
Domínio:  ['PA' 'MT' 'MA' 'MG' 'SP' 'BA' 'RO' 'AM' 'GO' 'TO' 'RS' 'MS' 'PR' 'RJ'
 'SC' 'PI' 'CE' 'AC' 'ES' 'RR' 'PE' 'PB' 'RN' 'AL' 'SE' 'DF' 'AP']

Coluna:   _mes
dtype:    int64
Domínio:  [ 1  2  3  4  5  6  7  8  9 10 11 12]

Coluna:   _ano
dtype:    int64
Domínio:  [2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023]

Coluna:   car_c02_emitido
dtype:    object
Domínio:  [' 438,735,643,592,919.00 ' ' 45,312,557,136,694,000.00 '
 ' 153,847,847,338,067.00 ' ' 15,981,773,732,551,300.00 '
 ' 15,215,607,800,649,100.00 ' ' 9,365,526,651,670,540.00 '
 ' 16,034,165,743,796,900.00 ' ' 44,392,783,215,699,200.00 '
 ' 8,866,177,091,141,850.00 ' ' 66,027,489,659,185,900.00 '
 ' 10,436,630,104,200,100.00 ' ' 9,614,600,477,343,160.00 '
 ' 7,775,150,436,054,550.00 ' ' 4,485,971,828,055,600.00 '
 ' 4,316,158,203,769,020.00 ' ' 1,535,257,555,750,260.00 '
 ' 22,695,777,205,136,700.00 ' ' 37,932,934

In [94]:
# CORREÇÃO DE DOMÍNIOS
df["_estado"] = df["_estado"].astype("string") # Converte para string
df["_mes"] = df["_mes"].astype("Int32") # Converte para Int32 (nulificável)
df["_ano"] = df["_ano"].astype("Int32") # Converte para Int32 (nulificável)

# Converte string para Float64 ('F' maiúsculo indica tipo nulificável)
def str2float64_aux(x: str):
  return x.strip().replace(",", "")
df["car_c02_emitido"] = df["car_c02_emitido"].apply(str2float64_aux).astype("Float64")

# Exibe resultado
df_show_domain(df, df.columns)

Coluna:   _estado
dtype:    string
Domínio:  <StringArray>
['PA', 'MT', 'MA', 'MG', 'SP', 'BA', 'RO', 'AM', 'GO', 'TO', 'RS', 'MS', 'PR',
 'RJ', 'SC', 'PI', 'CE', 'AC', 'ES', 'RR', 'PE', 'PB', 'RN', 'AL', 'SE', 'DF',
 'AP']
Length: 27, dtype: string

Coluna:   _mes
dtype:    Int32
Domínio:  <IntegerArray>
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Length: 12, dtype: Int32

Coluna:   _ano
dtype:    Int32
Domínio:  <IntegerArray>
[2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023]
Length: 24, dtype: Int32

Coluna:   car_c02_emitido
dtype:    Float64
Domínio:  <FloatingArray>
[   438735643592919.0,  4.5312557136694e+16,    153847847338067.0,
 1.59817737325513e+16, 1.52156078006491e+16,   9365526651670540.0,
 1.60341657437969e+16, 4.43927832156992e+16,   8866177091141850.0,
 6.60274896591859e+16,
 ...
 2.96157709012904e+16, 2.95669322315897e+16, 2.86279860225117e+16,
 2.66113220867753e+16,   15476573

In [95]:
# Ordena de forma mais conveniente
df = df.sort_values(by=["_estado", "_ano", "_mes"])

# Exibe resultado
display(df.head())

Unnamed: 0,_estado,_mes,_ano,car_c02_emitido
204,AC,1,2000,3.7932934085776e+16
205,AC,2,2000,3.7932934085776e+16
206,AC,3,2000,3.7932934085776e+16
207,AC,4,2000,3.7932934085776e+16
208,AC,5,2000,3.7932934085776e+16


In [96]:
# SALVAMENTO
df.to_csv("Dados_Tratados_EmissoesCO2_corrigido.csv", index=False, encoding="utf-8")