## Recolección de data desde Yahoo! Finance

##### Dos maneras para obtener precios de acciones utilizando el paquete yfinance (se asume que ya está instalado).

In [48]:
import pandas as pd
import numpy as np
import datetime as dt
import yfinance as yf

#### 1. yf.Ticker + .history()

In [50]:
# obteniendo la máxima data diaria de Starbucks
sbux = yf.Ticker('SBUX')
df = sbux.history(period='max')
print(df)

                                 Open        High         Low       Close  \
Date                                                                        
1992-06-26 00:00:00-04:00    0.262217    0.277825    0.255974    0.268461   
1992-06-29 00:00:00-04:00    0.271582    0.293434    0.265338    0.287190   
1992-06-30 00:00:00-04:00    0.293434    0.296555    0.274704    0.277825   
1992-07-01 00:00:00-04:00    0.280947    0.287190    0.271582    0.284069   
1992-07-02 00:00:00-04:00    0.287190    0.287190    0.277825    0.284069   
...                               ...         ...         ...         ...   
2023-02-13 00:00:00-05:00  107.449997  108.930000  107.279999  108.519997   
2023-02-14 00:00:00-05:00  107.930000  108.470001  106.699997  107.980003   
2023-02-15 00:00:00-05:00  107.500000  109.000000  107.260002  109.000000   
2023-02-16 00:00:00-05:00  107.760002  109.099998  107.410004  107.540001   
2023-02-17 00:00:00-05:00  106.739998  107.459999  106.230003  107.099998   

In [51]:
# data de los últimos 3 días
sbux.history(period='3d') # esto se podría guardar como una variable

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-02-15 00:00:00-05:00,107.5,109.0,107.260002,109.0,4060400,0.0,0.0
2023-02-16 00:00:00-05:00,107.760002,109.099998,107.410004,107.540001,4596800,0.0,0.0
2023-02-17 00:00:00-05:00,106.739998,107.459999,106.230003,107.099998,6876100,0.0,0.0


In [52]:
# data de un rango de fecha en específico
sbux.history(start = '2022-01-01', end = '2023-01-01') # esto se podría guardar como una variable

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-01-03 00:00:00-05:00,113.198967,114.491616,111.556428,113.403069,5475700,0.0,0.0
2022-01-04 00:00:00-05:00,113.616891,113.762680,110.963560,111.031593,8367600,0.0,0.0
2022-01-05 00:00:00-05:00,111.187101,111.731371,107.299440,107.338318,8662300,0.0,0.0
2022-01-06 00:00:00-05:00,106.910679,108.737877,106.900957,108.018661,6099900,0.0,0.0
2022-01-07 00:00:00-05:00,105.180666,106.628818,104.461451,104.548920,11266400,0.0,0.0
...,...,...,...,...,...,...,...
2022-12-23 00:00:00-05:00,97.103427,97.809894,96.396968,97.780037,3944500,0.0,0.0
2022-12-27 00:00:00-05:00,98.078542,98.964104,97.720337,98.705399,4402500,0.0,0.0
2022-12-28 00:00:00-05:00,98.914357,99.411864,97.899439,98.108391,3491500,0.0,0.0
2022-12-29 00:00:00-05:00,98.506403,100.148177,98.178046,99.272560,3976100,0.0,0.0


#### 2. datareader + get_data_yahoo()

In [53]:
import pandas as pd
import numpy as np
import datetime as dt
import yfinance as yf
from pandas_datareader import data as pdr
yf.pdr_override()

In [54]:
# establecemos un rango de fecha
start = dt.datetime(2012,1,31)
end = dt.datetime(2022,1,31)

In [55]:
# obteniendo data diaria de la empresa TexasRoad House
df = pdr.get_data_yahoo('TXRH', start, end)
print(df)

[*********************100%***********************]  1 of 1 completed
                                Open       High        Low      Close  \
Date                                                                    
2012-01-31 00:00:00-05:00  15.390000  15.410000  14.960000  15.160000   
2012-02-01 00:00:00-05:00  15.270000  15.610000  15.140000  15.610000   
2012-02-02 00:00:00-05:00  15.580000  15.740000  15.460000  15.730000   
2012-02-03 00:00:00-05:00  15.980000  16.309999  15.900000  15.990000   
2012-02-06 00:00:00-05:00  15.960000  16.110001  15.880000  15.940000   
...                              ...        ...        ...        ...   
2022-01-24 00:00:00-05:00  78.059998  83.540001  76.650002  83.360001   
2022-01-25 00:00:00-05:00  81.870003  84.419998  80.360001  83.260002   
2022-01-26 00:00:00-05:00  84.139999  85.389999  81.540001  82.040001   
2022-01-27 00:00:00-05:00  81.559998  83.250000  79.809998  80.120003   
2022-01-28 00:00:00-05:00  79.910004  83.760002  78.610

In [56]:
# para obtener data semanal
pdr.get_data_yahoo('TXRH', start, end, interval = '1wk') # esto se podría guardar como una variable

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2012-01-30 00:00:00-05:00,15.390000,16.309999,14.960000,15.990000,13.140136,3433300
2012-02-06 00:00:00-05:00,15.960000,16.459999,15.830000,15.850000,13.025088,2537100
2012-02-13 00:00:00-05:00,15.970000,16.600000,15.590000,16.400000,13.477062,3728900
2012-02-20 00:00:00-05:00,16.370001,17.830000,16.260000,16.820000,13.822206,7868400
2012-02-27 00:00:00-05:00,16.629999,17.120001,16.190001,16.350000,13.435975,4832000
...,...,...,...,...,...,...
2021-12-27 00:00:00-05:00,88.139999,92.239998,87.260002,89.279999,87.317184,2435700
2022-01-03 00:00:00-05:00,89.570000,93.519997,89.160004,90.220001,88.236519,3592500
2022-01-10 00:00:00-05:00,89.620003,89.779999,85.639999,87.290001,85.370941,3437300
2022-01-17 00:00:00-05:00,86.370003,86.620003,79.190002,79.279999,77.537033,3160300


In [57]:
# para obtener data mensual
pdr.get_data_yahoo('TXRH', start, end, interval = '1mo') # esto se podría guardar como una variable

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2012-02-01 00:00:00-05:00,15.270000,17.830000,15.140000,16.730000,13.748245,19313600
2012-03-01 00:00:00-05:00,16.730000,17.250000,16.129999,16.639999,13.674287,16516400
2012-04-01 00:00:00-04:00,16.559999,17.410000,15.900000,17.250000,14.252190,13791200
2012-05-01 00:00:00-04:00,18.400000,19.350000,17.240000,18.180000,15.020569,20771500
2012-06-01 00:00:00-04:00,17.780001,18.469999,17.010000,18.400000,15.202334,16592200
...,...,...,...,...,...,...
2021-09-01 00:00:00-04:00,95.500000,97.430000,88.680000,91.330002,88.533943,13586300
2021-10-01 00:00:00-04:00,92.300003,96.410004,85.809998,88.809998,86.460495,19231500
2021-11-01 00:00:00-04:00,89.000000,96.110001,81.309998,82.940002,80.745796,16320800
2021-12-01 00:00:00-05:00,84.500000,92.239998,80.000000,89.279999,86.918068,19761600


In [46]:
# obteniendo la data de varias acciones
stocks = ['AAPL', 'GOOGL', 'V']
df = pdr.get_data_yahoo(stocks, start, end)
df.tail()

[*********************100%***********************]  3 of 3 completed


Unnamed: 0_level_0,Adj Close,Adj Close,Adj Close,Close,Close,Close,High,High,High,Low,Low,Low,Open,Open,Open,Volume,Volume,Volume
Unnamed: 0_level_1,AAPL,GOOGL,V,AAPL,GOOGL,V,AAPL,GOOGL,V,AAPL,GOOGL,V,AAPL,GOOGL,V,AAPL,GOOGL,V
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
2022-01-24 00:00:00-05:00,160.442444,130.804001,199.942993,161.619995,130.804001,201.850006,162.300003,131.203003,202.720001,154.699997,124.5,195.649994,160.020004,125.977997,202.179993,162294600,76622000,16054000
2022-01-25 00:00:00-05:00,158.615829,126.934998,200.091553,159.779999,126.934998,202.0,162.759995,129.399002,205.380005,157.020004,126.500504,196.990005,158.979996,128.740005,199.619995,115798400,46960000,12125500
2022-01-26 00:00:00-05:00,158.526505,129.233002,203.924988,159.690002,129.233002,205.869995,164.389999,133.0,210.860001,157.820007,127.141998,203.669998,163.5,131.119003,204.279999,108275300,49130000,10817600
2022-01-27 00:00:00-05:00,158.059921,129.005005,204.202332,159.220001,129.005005,206.149994,163.839996,132.652496,210.960007,158.279999,128.932495,205.850006,162.449997,131.304001,209.5,121954600,31950000,12009800
2022-01-28 00:00:00-05:00,169.088989,133.350998,225.845901,170.330002,133.350998,228.0,170.350006,133.356506,228.119995,162.800003,128.485001,216.100006,165.710007,129.658997,220.639999,179935700,34362000,26425300
