# Esse arquivo tem como objetivo preparar os dados para o estudo aplicado

## Preparo do Ambiente e conexão com as bases de dados

In [106]:
#importações
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [107]:
#Lendo tabela 1

urlConcentr = "https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_mm_mlo.txt"


colunasConcentr = ['year', 'month','decimal_date', 'monthly_average', 'de-seasonalized', 'days_of_month', 'stdev', 'unc_of_mon_mean']

dfConcentr = pd.read_csv(urlConcentr, delim_whitespace=True, comment='#', names=colunasConcentr)
dfConcentr

Unnamed: 0,year,month,decimal_date,monthly_average,de-seasonalized,days_of_month,stdev,unc_of_mon_mean
0,1958,3,1958.2027,315.70,314.43,-1,-9.99,-0.99
1,1958,4,1958.2877,317.45,315.16,-1,-9.99,-0.99
2,1958,5,1958.3699,317.51,314.71,-1,-9.99,-0.99
3,1958,6,1958.4548,317.24,315.14,-1,-9.99,-0.99
4,1958,7,1958.5370,315.86,315.18,-1,-9.99,-0.99
...,...,...,...,...,...,...,...,...
788,2023,11,2023.8750,420.46,422.47,21,0.91,0.38
789,2023,12,2023.9583,421.86,422.59,20,0.69,0.29
790,2024,1,2024.0417,422.80,422.48,27,0.70,0.26
791,2024,2,2024.1250,424.55,423.58,22,1.24,0.51


In [108]:
dfTemp = pd.read_csv("Data/TempData.csv", skiprows = 5, names =[ "Date", "Temperature Anomalies"])
dfTemp

Unnamed: 0,Date,Temperature Anomalies
0,195901,0.13
1,195902,0.10
2,195903,0.22
3,195904,0.15
4,195905,0.09
...,...,...
775,202308,1.26
776,202309,1.42
777,202310,1.39
778,202311,1.42


## Análise exploratória dos dados

In [109]:
dfConcentr.isna().sum()

year               0
month              0
decimal_date       0
monthly_average    0
de-seasonalized    0
days_of_month      0
stdev              0
unc_of_mon_mean    0
dtype: int64

In [110]:
dfConcentr.describe()

Unnamed: 0,year,month,decimal_date,monthly_average,de-seasonalized,days_of_month,stdev,unc_of_mon_mean
count,793.0,793.0,793.0,793.0,793.0,793.0,793.0,793.0
mean,1990.708701,6.495586,1991.207714,358.950567,358.946974,19.037831,-2.090315,-0.097226
std,19.090749,3.454289,19.089455,31.547351,31.491669,11.995415,4.532453,0.513076
min,1958.0,1.0,1958.2027,312.43,314.43,-1.0,-9.99,-0.99
25%,1974.0,3.0,1974.7083,330.3,330.53,11.0,0.17,0.07
50%,1991.0,6.0,1991.2083,354.99,355.59,25.0,0.4,0.15
75%,2007.0,9.0,2007.7083,384.41,384.56,28.0,0.57,0.21
max,2024.0,12.0,2024.2083,425.38,423.92,31.0,1.31,0.58


In [111]:
dfTemp.isna().sum()

Date                     0
Temperature Anomalies    0
dtype: int64

In [112]:
dfTemp.describe()

Unnamed: 0,Date,Temperature Anomalies
count,780.0,780.0
mean,199106.5,0.395474
std,1877.37331,0.34642
min,195901.0,-0.28
25%,197503.75,0.11
50%,199106.5,0.36
75%,200709.25,0.66
max,202312.0,1.42


## Transformação e Filtragem dos dados

In [113]:
#Filtrar período nos dados de concentração de Carbono
dfConcentr = dfConcentr.query(' year > 1958 and year < 2024')
dfConcentr = dfConcentr.reset_index()
dfConcentr

Unnamed: 0,index,year,month,decimal_date,monthly_average,de-seasonalized,days_of_month,stdev,unc_of_mon_mean
0,10,1959,1,1959.0411,315.58,315.55,-1,-9.99,-0.99
1,11,1959,2,1959.1260,316.48,315.86,-1,-9.99,-0.99
2,12,1959,3,1959.2027,316.65,315.38,-1,-9.99,-0.99
3,13,1959,4,1959.2877,317.72,315.41,-1,-9.99,-0.99
4,14,1959,5,1959.3699,318.29,315.49,-1,-9.99,-0.99
...,...,...,...,...,...,...,...,...,...
775,785,2023,8,2023.6250,419.68,421.60,21,0.45,0.19
776,786,2023,9,2023.7083,418.51,421.99,18,0.30,0.14
777,787,2023,10,2023.7917,418.82,422.14,27,0.47,0.17
778,788,2023,11,2023.8750,420.46,422.47,21,0.91,0.38


In [114]:
#Converter tipo da coluna para realizar transformações
dfTemp['Date'] = dfTemp['Date'].astype(str)

#Criar colunas separadas
dfTemp['year'] = dfTemp['Date'].apply(lambda x: str(x)[:4])
dfTemp['month'] = dfTemp['Date'].apply(lambda x: str(x)[4:6])
dfTemp = dfTemp.drop('Date', axis = 'columns')

In [115]:
dfTemp

Unnamed: 0,Temperature Anomalies,year,month
0,0.13,1959,01
1,0.10,1959,02
2,0.22,1959,03
3,0.15,1959,04
4,0.09,1959,05
...,...,...,...
775,1.26,2023,08
776,1.42,2023,09
777,1.39,2023,10
778,1.42,2023,11


In [116]:
# Salvar tabelas
dfConcentr.to_csv('Data/Concentra_atmos_CO2.csv')
dfTemp.to_csv('Data/Anom_Temp.csv')

In [117]:
#criar um dataframe filtrado
df_Filtrado = pd.DataFrame({'year': dfConcentr['year'], 
                            'month': dfConcentr['month'],
                            'decimal date': dfConcentr['decimal_date'],
                           'temperature anomalies': dfTemp['Temperature Anomalies'],
                            'Co2 ppm': dfConcentr['monthly_average']
                           })

In [118]:
df_Filtrado

Unnamed: 0,year,month,decimal date,temperature anomalies,Co2 ppm
0,1959,1,1959.0411,0.13,315.58
1,1959,2,1959.1260,0.10,316.48
2,1959,3,1959.2027,0.22,316.65
3,1959,4,1959.2877,0.15,317.72
4,1959,5,1959.3699,0.09,318.29
...,...,...,...,...,...
775,2023,8,2023.6250,1.26,419.68
776,2023,9,2023.7083,1.42,418.51
777,2023,10,2023.7917,1.39,418.82
778,2023,11,2023.8750,1.42,420.46


In [119]:
df_Filtrado.to_csv('Data/final_data.csv')