<a href="https://colab.research.google.com/github/Randresil/TimeSeries_R_Python/blob/main/2_Python_Series_Temporales.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Serie temporal en Python

## Sección 4

### 1. Importación de paquetes

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### 2. Importación de datos

In [43]:
raw_data = pd.read_csv('Index2018.csv')

df = raw_data.copy()

### 3. Tamaño de periodos de tiempo

In [8]:
df.describe()

Unnamed: 0,spx,dax,ftse,nikkei
count,6269.0,6269.0,6269.0,6269.0
mean,1288.127542,6080.063363,5422.713545,14597.0557
std,487.586473,2754.361032,1145.572428,4043.122953
min,438.92,1911.7,2876.6,7054.98
25%,990.671905,4069.35,4486.1,10709.29
50%,1233.42,5773.34,5662.43,15028.17
75%,1459.987747,7443.07,6304.25,17860.47
max,2872.867839,13559.6,7778.637689,24124.15


In [9]:
df.head()

Unnamed: 0,date,spx,dax,ftse,nikkei
0,07/01/1994,469.9,2224.95,3445.98,18124.01
1,10/01/1994,475.27,2225.0,3440.58,18443.44
2,11/01/1994,474.13,2228.1,3413.77,18485.25
3,12/01/1994,474.17,2182.06,3372.02,18793.88
4,13/01/1994,472.47,2142.37,3360.01,18577.26


In [6]:
df['date'].describe()

count           6269
unique          6269
top       07/01/1994
freq               1
Name: date, dtype: object

### 4. De Texto (String) a Fecha

In [10]:
df['date'] = pd.to_datetime(df['date'], dayfirst = True)

In [11]:
df.head()

Unnamed: 0,date,spx,dax,ftse,nikkei
0,1994-01-07,469.9,2224.95,3445.98,18124.01
1,1994-01-10,475.27,2225.0,3440.58,18443.44
2,1994-01-11,474.13,2228.1,3413.77,18485.25
3,1994-01-12,474.17,2182.06,3372.02,18793.88
4,1994-01-13,472.47,2142.37,3360.01,18577.26


In [10]:
df['date'].describe()

  df['date'].describe()


count                    6269
unique                   6269
top       1994-01-07 00:00:00
freq                        1
first     1994-01-07 00:00:00
last      2018-01-29 00:00:00
Name: date, dtype: object

### 5. Fijando fecha como el índice

In [12]:
df.set_index('date', inplace = True) # Inplace permite que sea guardado

In [13]:
df.head()

Unnamed: 0_level_0,spx,dax,ftse,nikkei
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1994-01-07,469.9,2224.95,3445.98,18124.01
1994-01-10,475.27,2225.0,3440.58,18443.44
1994-01-11,474.13,2228.1,3413.77,18485.25
1994-01-12,474.17,2182.06,3372.02,18793.88
1994-01-13,472.47,2142.37,3360.01,18577.26


### 6. Establecimiento frecuencia deseada

In [14]:
# h de hourly
# w weekly
# d daily
# b business days
# m monthly
# a annually

df = df.asfreq('d')

In [15]:
df.head()

Unnamed: 0_level_0,spx,dax,ftse,nikkei
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1994-01-07,469.9,2224.95,3445.98,18124.01
1994-01-08,,,,
1994-01-09,,,,
1994-01-10,475.27,2225.0,3440.58,18443.44
1994-01-11,474.13,2228.1,3413.77,18485.25


In [16]:
df = df.asfreq('b') # b se refiere a dias laborales (Lun-Vie)

In [17]:
df.head()

Unnamed: 0_level_0,spx,dax,ftse,nikkei
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1994-01-07,469.9,2224.95,3445.98,18124.01
1994-01-10,475.27,2225.0,3440.58,18443.44
1994-01-11,474.13,2228.1,3413.77,18485.25
1994-01-12,474.17,2182.06,3372.02,18793.88
1994-01-13,472.47,2142.37,3360.01,18577.26


### 7. Manejando missing values

In [19]:
df.isna()

Unnamed: 0_level_0,spx,dax,ftse,nikkei,market_value
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1994-01-07,False,False,False,False,False
1994-01-10,False,False,False,False,False
1994-01-11,False,False,False,False,False
1994-01-12,False,False,False,False,False
1994-01-13,False,False,False,False,False
...,...,...,...,...,...
2018-01-23,False,False,False,False,False
2018-01-24,False,False,False,False,False
2018-01-25,False,False,False,False,False
2018-01-26,False,False,False,False,False


In [20]:
df.isna().sum()

spx             8
dax             8
ftse            8
nikkei          8
market_value    8
dtype: int64

In [21]:
# Método fillna
# ffill is front filling

df.spx = df.spx.fillna(method = 'ffill')

In [22]:
df.ftse = df.ftse.fillna(method = 'bfill')

In [25]:
df.nikkei = df.nikkei.fillna(method = 'bfill')

In [23]:
df.dax = df.dax.fillna(value = df.dax.mean())

In [26]:
df.isna().sum()

spx             0
dax             0
ftse            0
nikkei          0
market_value    8
dtype: int64

### 8. Simplificando DataSet

In [44]:
df['market_value'] = df.spx

In [29]:
df.describe()

Unnamed: 0,spx,dax,ftse,nikkei,market_value
count,6277.0,6277.0,6277.0,6277.0,6277.0
mean,1288.642547,6080.063363,5423.690398,14597.597179,1288.642547
std,487.86821,2752.604984,1145.56837,4043.683038,487.86821
min,438.92,1911.7,2876.6,7054.98,438.92
25%,992.715221,4070.46,4487.88,10701.13,992.715221
50%,1233.761241,5774.38,5663.3,15030.51,1233.761241
75%,1460.25,7442.66,6304.630175,17860.47,1460.25
max,2872.867839,13559.6,7778.637689,24124.15,2872.867839


In [45]:
# Comando del
# Eliminar variable/columna de un dataset

del df['spx']

In [31]:
df.describe()

Unnamed: 0,dax,ftse,nikkei,market_value
count,6277.0,6277.0,6277.0,6277.0
mean,6080.063363,5423.690398,14597.597179,1288.642547
std,2752.604984,1145.56837,4043.683038,487.86821
min,1911.7,2876.6,7054.98,438.92
25%,4070.46,4487.88,10701.13,992.715221
50%,5774.38,5663.3,15030.51,1233.761241
75%,7442.66,6304.630175,17860.47,1460.25
max,13559.6,7778.637689,24124.15,2872.867839


In [46]:
del df['dax']
del df['ftse']
del df['nikkei']

In [34]:
df.describe()

Unnamed: 0,market_value
count,6277.0
mean,1288.642547
std,487.86821
min,438.92
25%,992.715221
50%,1233.761241
75%,1460.25
max,2872.867839


### 9. Separando la data

In [35]:
size = int(len(df)*0.8) # 80% de los datos

In [52]:
df_test1 = df.iloc[: size] # Agarrar todos los datos de inicio hasta size

In [51]:
df_test = df.iloc[size :] # Agarrar todos los datos desde size hasta final

In [39]:
print(size)


5021


In [53]:
df_test1.tail()

Unnamed: 0,date,market_value
5016,05/04/2013,1553.27893
5017,08/04/2013,1563.071269
5018,09/04/2013,1568.607909
5019,10/04/2013,1587.731827
5020,11/04/2013,1593.369863


In [54]:
df_test.head()

Unnamed: 0,date,market_value
5021,12/04/2013,1588.854623
5022,15/04/2013,1552.363572
5023,16/04/2013,1574.566985
5024,17/04/2013,1552.014896
5025,18/04/2013,1541.613396
