# Série temporal de Concentração de CO2 na Atmosfera

In [2]:
#inicializações
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from urllib.request import urlopen

## Extração de dados

In [3]:
url = 'https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_mm_mlo.txt'

In [10]:
df = pd.read_csv(url,delim_whitespace=True, comment= '#', header = None, names = ['Year','Month','Decimal Date', 'Monthly Average', 'de-seasonalized', '#Days', 'St.Dev of days', 'unc. of mon mean'])

In [26]:
df.sample(5)

Unnamed: 0,Year,Month,Decimal Date,Monthly Average,de-seasonalized,#Days,St.Dev of days,unc. of mon mean
220,1976,7,1976.5417,333.08,332.23,15,0.24,0.12
373,1989,4,1989.2917,355.72,353.07,28,0.47,0.17
170,1972,5,1972.3716,330.07,327.14,-1,-9.99,-0.99
197,1974,8,1974.625,329.15,330.54,26,0.31,0.12
558,2004,9,2004.7083,374.43,377.8,15,0.56,0.28


In [9]:
df.columns

Index(['Year', 'Month', 'Decimal Data', 'Monthly Average', 'de-seasonalized',
       '#Days', 'St.Dev of days', 'unc. of mon mean'],
      dtype='object')

## Pré-processamento dos dados

In [20]:
#Início do registro
df[['Year','Decimal Date']].min()

Year            1958.0000
Decimal Date    1958.2027
dtype: float64

In [21]:
#último registro
df[['Year','Decimal Date']].max()

Year            2023.000
Decimal Date    2023.625
dtype: float64

In [22]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 786 entries, 0 to 785
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Year              786 non-null    int64  
 1   Month             786 non-null    int64  
 2   Decimal Date      786 non-null    float64
 3   Monthly Average   786 non-null    float64
 4   de-seasonalized   786 non-null    float64
 5   #Days             786 non-null    int64  
 6   St.Dev of days    786 non-null    float64
 7   unc. of mon mean  786 non-null    float64
dtypes: float64(5), int64(3)
memory usage: 49.2 KB


In [24]:
df.isna().sum()

Year                0
Month               0
Decimal Date        0
Monthly Average     0
de-seasonalized     0
#Days               0
St.Dev of days      0
unc. of mon mean    0
dtype: int64

In [34]:
df['Date'] = df['Year'].map(str) + '-' +  df['Month'].map(str)

In [35]:
df

Unnamed: 0,Year,Month,Decimal Date,Monthly Average,de-seasonalized,#Days,St.Dev of days,unc. of mon mean,Date
0,1958,3,1958.2027,315.70,314.43,-1,-9.99,-0.99,1958-3
1,1958,4,1958.2877,317.45,315.16,-1,-9.99,-0.99,1958-4
2,1958,5,1958.3699,317.51,314.71,-1,-9.99,-0.99,1958-5
3,1958,6,1958.4548,317.24,315.14,-1,-9.99,-0.99,1958-6
4,1958,7,1958.5370,315.86,315.18,-1,-9.99,-0.99,1958-7
...,...,...,...,...,...,...,...,...,...
781,2023,4,2023.2917,423.36,420.69,28,0.60,0.22,2023-4
782,2023,5,2023.3750,424.00,420.55,31,0.69,0.24,2023-5
783,2023,6,2023.4583,423.68,421.19,29,0.57,0.20,2023-6
784,2023,7,2023.5417,421.83,421.42,21,0.48,0.20,2023-7


In [41]:
df.drop(labels = ['Year','Month'], axis = 1)

Unnamed: 0,Decimal Date,Monthly Average,de-seasonalized,#Days,St.Dev of days,unc. of mon mean,Date
0,1958.2027,315.70,314.43,-1,-9.99,-0.99,1958-3
1,1958.2877,317.45,315.16,-1,-9.99,-0.99,1958-4
2,1958.3699,317.51,314.71,-1,-9.99,-0.99,1958-5
3,1958.4548,317.24,315.14,-1,-9.99,-0.99,1958-6
4,1958.5370,315.86,315.18,-1,-9.99,-0.99,1958-7
...,...,...,...,...,...,...,...
781,2023.2917,423.36,420.69,28,0.60,0.22,2023-4
782,2023.3750,424.00,420.55,31,0.69,0.24,2023-5
783,2023.4583,423.68,421.19,29,0.57,0.20,2023-6
784,2023.5417,421.83,421.42,21,0.48,0.20,2023-7


In [42]:
serie_temporal = df.set_index('Date')[['Decimal Date','Monthly Average']]

In [43]:
serie_temporal

Unnamed: 0_level_0,Decimal Date,Monthly Average
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1958-3,1958.2027,315.70
1958-4,1958.2877,317.45
1958-5,1958.3699,317.51
1958-6,1958.4548,317.24
1958-7,1958.5370,315.86
...,...,...
2023-4,2023.2917,423.36
2023-5,2023.3750,424.00
2023-6,2023.4583,423.68
2023-7,2023.5417,421.83
