# INTRODUÇÃO A CRIAÇÃO DE SÉRIES TEMPORAIS

In [1]:
import numpy as np
import pandas as pd
from pandas_datareader import data as wb

In [2]:
frames = pd.Series(np.random.random(5), name = "Coluna01")

In [3]:
frames

0    0.665829
1    0.295483
2    0.352182
3    0.440864
4    0.410492
Name: Coluna01, dtype: float64

In [4]:
# IMPORT DOS DADOS VIA API DO YAHOO
# PG é o ticker (como fica o nome das empresas na bolsa)
# Ex: Apple(AAPL), Microsoft(MSFT), etc
# Depois especifica a data de início da extração
PG = wb.DataReader('PG', data_source='yahoo', start='1995-1-1')

In [5]:
# Séries temporais
# Incluem apenas dias de negociação (Sem sabados domingos e feriados)
PG.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1995-01-03,15.625,15.4375,15.46875,15.59375,3318400.0,8.215732
1995-01-04,15.65625,15.3125,15.53125,15.46875,2218800.0,8.149876
1995-01-05,15.4375,15.21875,15.375,15.25,2319600.0,8.034625
1995-01-06,15.40625,15.15625,15.15625,15.28125,3438000.0,8.051086
1995-01-09,15.40625,15.1875,15.34375,15.21875,1795200.0,8.018162


In [6]:
PG.tail()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-05,138.919998,136.270004,137.990005,138.699997,6856400.0,138.699997
2021-01-06,141.039993,138.600006,138.770004,140.160004,10578000.0,140.160004
2021-01-07,140.130005,138.039993,139.75,138.850006,7355400.0,138.850006
2021-01-08,139.059998,137.0,138.470001,138.789993,7447700.0,138.789993
2021-01-11,139.440002,138.089996,138.75,138.225006,1076737.0,138.225006


In [7]:
PG.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 6553 entries, 1995-01-03 to 2021-01-11
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   High       6553 non-null   float64
 1   Low        6553 non-null   float64
 2   Open       6553 non-null   float64
 3   Close      6553 non-null   float64
 4   Volume     6553 non-null   float64
 5   Adj Close  6553 non-null   float64
dtypes: float64(6)
memory usage: 358.4 KB


In [8]:
# Criando lista com Ticker de outras empresas:
# Foi escolhida apenas a coluna 'Adj Close'
tickers = ['PG', 'MSFT', 'T', 'F', 'GE']
new_data = pd.DataFrame()
for tick in tickers:
    new_data[tick] = wb.DataReader(tick, data_source='yahoo', start='1995-1-1')['Adj Close']

In [9]:
new_data.head()

Unnamed: 0_level_0,PG,MSFT,T,F,GE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1995-01-03,8.215732,2.394761,6.087094,4.380183,4.12263
1995-01-04,8.149876,2.412169,6.052292,4.498034,4.12263
1995-01-05,8.034625,2.37238,6.146861,4.458752,4.132737
1995-01-06,8.051086,2.412169,6.146861,4.458752,4.112525
1995-01-09,8.018162,2.397249,6.127946,4.53732,4.07211


In [10]:
new_data.tail()

Unnamed: 0_level_0,PG,MSFT,T,F,GE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-01-05,138.699997,217.899994,28.751301,8.65,10.77
2021-01-06,140.160004,212.25,29.311392,8.84,11.36
2021-01-07,138.850006,218.289993,29.389999,9.06,11.27
2021-01-08,138.789993,219.619995,29.02,9.0,11.34
2021-01-11,138.179993,217.912094,28.875,9.105,11.35


In [11]:
# Definindo data como index (jeito 1)
dataf = pd.read_csv('Data_02.csv', index_col='Date')

In [12]:
dataf.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ex-Dividend,Split Ratio,Adj. Open,Adj. High,Adj. Low,Adj. Close,Adj. Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2017-03-03,136.63,137.33,136.08,137.13,11095605.0,0.0,1.0,136.63,137.33,136.08,137.13,11095605.0
2017-03-02,137.09,137.82,136.31,136.7603,12243028.0,0.0,1.0,137.09,137.82,136.31,136.7603,12243028.0
2017-03-01,136.47,137.48,136.3,137.42,16257010.0,0.0,1.0,136.47,137.48,136.3,137.42,16257010.0
2017-02-28,136.79,136.805,134.75,135.54,16112092.0,0.0,1.0,136.79,136.805,134.75,135.54,16112092.0
2017-02-27,135.26,137.1846,135.02,136.41,14306668.0,0.0,1.0,135.26,137.1846,135.02,136.41,14306668.0


In [13]:
# Definindo ano como index (jeito 2)
datag = pd.read_excel("Data_03.xlsx")

In [14]:
datag.head()

Unnamed: 0,Year,Gas Exp,Pop,Gasp,Income
0,1953,7.4,159565,16.667999,8883
1,1954,7.8,162391,17.028999,8685
2,1955,8.6,165275,17.209999,9137
3,1956,9.4,168221,17.729,9436
4,1957,10.2,171274,18.497,9534


In [15]:
datag = datag.set_index('Year')

In [16]:
datag.head()

Unnamed: 0_level_0,Gas Exp,Pop,Gasp,Income
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1953,7.4,159565,16.667999,8883
1954,7.8,162391,17.028999,8685
1955,8.6,165275,17.209999,9137
1956,9.4,168221,17.729,9436
1957,10.2,171274,18.497,9534
