### Modules

In [1]:
#pip install pandas-datareader

In [2]:
import urllib.request
import requests
import pycurl
import json
import datetime as dt
from datetime import date, datetime, timedelta
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas_datareader as pdr
import http.client as http
plt.rcParams['figure.figsize'] = [50, 15]

### Getting price data

##### First of all we must get the indicator so we can acquire the info of that indicator from the API

In [3]:
class data_recollection(object):
    """
    With this class we are resuming all the steps for getting the data into some functions. 
    """
    def __init__(self):
        None
        
    def get_list_indicators(self):
        """
        A function to get the list of all the indicators that we can find in the ESIOS API
        """
        token = '3eae9719f5c8a0dff1c71bb3a6e709bbc37bfce5f6df3662789a1c6fee2ebd67'
        #First, we set the url:
        url_esios1='https://api.esios.ree.es/indicators'
        #After that, we manage to request the dictionary with the indicators from the webpage:
        request = urllib.request.Request(url_esios1)
        head=["Authorization: Token token=\""+token+"\""]
        request.add_header("Authorization","Token token=\""+token+"\"")
        response = urllib.request.urlopen(request)
        responseStr = str(response.read().decode('utf-8'))

        # We fetch json from the response
        js = json.loads(responseStr)

        dicc=js['indicators']

        #We put the results into a list so we can look through it:
        busqueda=[]
        for diccionario in dicc:
            busqueda.append(diccionario)
        # Finally, we return de results
        return busqueda

    def get_indicator(self,indicator,date_today=date.today().strftime("%Y-%m-%d")):
        """
        With this function we will connect to the server of ESIOS and we will get the info of the indicator that we want until the
        date that we indicate. As default, it will be set until today. The parameters are:
            - indicator: number of the indicator according to the dictionary that we have
            - date: limit day for the info. format "Year-month-day"
        """
        token = '3eae9719f5c8a0dff1c71bb3a6e709bbc37bfce5f6df3662789a1c6fee2ebd67'
        # We change the http from 1.1 to 1.0 beacuse it sometimes gives problems when requesting the data 
        http.HTTPConnection._http_vsn = 10
        http.HTTPConnection._http_vsn_str = 'HTTP/1.0'
        # Set URL value
        url='https://api.esios.ree.es/indicators/'+str(indicator)+'?start_date=2014-04-01T00%3A00%3A00Z&end_date='+date_today+'T23%3A50%3A00Z&groupby=hour'
        # Get the request
        request = urllib.request.Request(url)
        request.add_header("Authorization","Token token=\""+token+"\"")
        response = urllib.request.urlopen(request)
        responseStr = str(response.read().decode('utf-8'))
        # Fetch json from the response
        data = json.loads(responseStr)
        indicators = data['indicator'] 
        return indicators       
        
    def get_values(self,data):
        """
        With this function we will manage to get the values of the dictionary and create a dataframe with
        the info that we want.
        """
        # First we get the values from the dictionary
        data_list = list(data['values'])
        # Then we create a df with the values that we are interested in:
        value=[]
        datetime=[]
        datetime_utc=[]
        tz_time=[]
        geo_id=[]
        geo_name=[]
        for dic in data_list:
            value.append(dic['value'])
            datetime.append(dic['datetime'])
            datetime_utc.append(dic['datetime_utc'])
            tz_time.append(dic['tz_time'])
            geo_id.append(dic['geo_id'])
            geo_name.append(dic['geo_name'])
        #We create the dictionary and change de data types.
        df=pd.DataFrame({'value':value,'datetime':datetime,'datetime_utc':datetime_utc,'tz_time':tz_time,'geo_id':geo_id,'geo_name':geo_name},)
        df['datetime']=pd.to_datetime(df['datetime'])
        df['datetime_utc']=pd.to_datetime(df['datetime_utc'])
        df['tz_time']=pd.to_datetime(df['tz_time'])
        df=df[(df['geo_name']=='España')|(df['geo_name']=='Península')]
        return df    
    
    def worldbank_info(self,indicator):
        """
        With this function we will get the information necessary from the worldbank api. We just need to add the 
        indicator and we will get a dataframe with the date, the value and the unit
        """
            # Set URL value
        url_worldbank=' http://api.worldbank.org/v2/country/all/indicator/'+indicator+'?per_page=20000&format=json'
            # Get the request
        request = urllib.request.Request(url_worldbank)
        response = urllib.request.urlopen(request)
        responseStr = str(response.read().decode('utf-8'))
            # Fetch json from the response
        data = json.loads(responseStr)
            # Then we get the values from the json
        valor=[]
        fecha=[]
        unidad=[]
        for cell in data[1]:
            if cell['country']['value']=='Spain':
                valor.append(cell['value'])
                fecha.append(cell['date'])
            else:
                continue
            # Create the dataframe with the values.
        df = pd.DataFrame({'date':fecha,'value':valor})
        df[['value']]=df[['value']].astype(float)
        df[['date']]=df[['date']].astype(int)
        df2 = df[(df['date']>=2014) & (df['date']<=2020)]
        return df2
    
    def finance_data(self,indicator):
        """
        With this function we will get the stock market historical values from Yahoo! Finance for the indicator we decide.
        """
        ree = pdr.data.DataReader(indicator,'yahoo', start=datetime(2014, 4, 1), end=datetime.now())
        return ree

    def national_holidays(self):
        """
        We will indicate the days that are festive for the whole country:
            - 1 de Enero -> Año nuevo
            - 6 de Enero -> Reyes - Epifanía del Señor
            - 10 de Abril -> Viernes Santo
            - 1 de Mayo -> Fiesta del Trabajo
            - 15 de Agosto -> Asunción de la Virgen
            - 12 de Octubre -> Día de la Hispanidad
            - 8 de Diciembre -> Inmaculada Concepción
            - 25 de Diciembre -> Navidad
        """
        festivos=[[1,1,1],[6,1,1],[10,4,1],[1,5,1],[15,8,1],[12,10,1],[8,12,1],[25,12,1]]
        df_fest=pd.DataFrame(festivos,columns=['day','month','value'])
        return df_fest

    def pib_data(self):
        pib=pd.DataFrame()
        for i in reversed(range(2014,(datetime.today().year+1))):
            url='https://datosmacro.expansion.com/pib/espana?anio='+str(i)
            df=pd.read_html(url)
            pib_anio=df[0]
            pib_anio.drop(pib_anio.tail(1).index,inplace=True)
            pib=pib.append(pib_anio)
        return pib

We call the class:

In [4]:
data_rec=data_recollection()

Indicadores que tenemos:

- 1014 : PVPC en dos tiempos

- 1013 : PVPC en un tiempo

- 1293 : Demanda real

- 10229 : PVPC en un tiempo (si te metes en la pagina web aparece desglosado)

- 10230 : PVPC en dos tiempos (si te metes en la pagina web aparece desglosado)

- 600 : precio marginal mercado diario

- 10027 : prevision de demanda electrica

- 10010 : generacion programada de energía eólica

- 10008 : Su desglose muestra la energía programada por tipo de producción del Carbón.

- 612 : Precio marginal mercado intradiario sesion 1

- 613 : Precio marginal mercado intradiario sesion 2

- 542 : Generación prevista Solar

- 460 : Calendario de la demanda diaria eléctrica peninsular según la prevision

- 369 : Demana programada correción eolica

- 370 : Demana programada correción solar

- 541 : Previsión de la producción eólica nacional peninsular

- 805 : Precio medio horario componente mercado diario

- 92 : Generación Biogas

- 91 : Generacion Biomasa

- 79 : Generacion ciclo combinado

- 95 : Generacion consumo bombeo

- 88 : Generacion derivados de petroleo o carbon

- 90 : Generacion energia residual

- 96 : Generacion enlace baleares

- 82 : Generacion eolica terrestre

- 81 : Generacion gas natural

- 87 : Generacion gas natural cogeneracion

- 71 : Generacion hidraulica UGH

- 72 : Generacion hidraulica no UGH

- 77 : Generacion hulla-antracita

- 78 : Generacion hulla sub-bituminosa

- 74 : Generacion nuclear

- 86 : Generacion oceano y geotermica

- 93 : Generacion residuos domesticos

- 94 : Generacion varios

- 84 : Generacion solar fotovoltaica

- 85 : Generacion solar termica

- 89 : Generacion subproductos mineria

- 73 : Generacion turbinación bombeo


indicador yahoo:

 - REE.MC -> Red electrica española
 
 - %5EIBEX -> IBEX35

data: https://databank.worldbank.org/home.aspx

Consumer price index (2010 = 100) (FP.CPI.TOTL)

Time required to get electricity (days) (IC.ELC.TIME)

Inflation, consumer prices (annual %) (FP.CPI.TOTL.ZG)

Employment in industry (% of total employment) (modeled ILO estimate) (SL.IND.EMPL.ZS)

## Merging data

Now that the class is defined, we are ready to get all the information and manage to manipulate all the tables in order to get our final dataframe.

### 1. Yahoo! Finance info

If we want to show, somehow, the effect of a crisis, we may add the stock market value of IBEX35 to show the evolution of the country. Moreover, we will add the stock market value for the REE as well. 

In [5]:
stock_market_indicators=['REE.MC','%5EIBEX']
stock_market_dict={'REE.MC':'Red_Electrica',
                   '%5EIBEX':'IBEX35'}

In [6]:
stock_market_list = [data_rec.finance_data(st) for st in stock_market_indicators]

### 2. ESIOS info

We will create a list in which every element will be a dataframe, so we will end up with a list of dataframes that we will join later.

In [7]:
indicators_list=[10027,600,612,613,369,370,92,91,79,95,88,90,96,82,81,87,71,72,77,78,74,86,93,94,84,85,89,73]
objective=1014

Then we get the values for the indicators:

In [8]:
data_list = [data_rec.get_indicator(ind) for ind in indicators_list]

KeyboardInterrupt: 

In [None]:
dataframes_list = [data_rec.get_values(dt) for dt in data_list]

In [None]:
values_objective=data_rec.get_indicator(objective)

In [None]:
df_objective=data_rec.get_values(values_objective)

We need to get the names of the indicators so we can identify them in the dataframe:

In [None]:
descriptions=data_rec.get_list_indicators()

In [None]:
names={}
for i in descriptions:
    if i['id'] in (indicators_list) :
        names[i['id']]=i['name'].replace(' ','_')\
        .replace('á','a')\
        .replace('é','e')\
        .replace('í','i')\
        .replace('ó','o')\
        .replace('ú','u')
    else:
        continue

# We do the same for the objective name:
objective_name={}
for i in descriptions:
    if i['id']==1014 :
        objective_name[i['id']]=i['name'].replace(' ','_')\
        .replace('á','a')\
        .replace('é','e')\
        .replace('í','i')\
        .replace('ó','o')\
        .replace('ú','u')
    else:
        continue

We get the index of each name in our list of dataframes:

In [None]:
index_indicators=[]
for i in names.keys():
    if i!=1014:
        index_indicators.append(indicators_list.index(i))
    else:
        continue

In [None]:
list_of_names=list(names.values())
for i,ind in enumerate(index_indicators):
    dataframes_list[ind].rename(columns={'value':list_of_names[i]},inplace=True)

df_objective.rename(columns={'value':objective_name[1014]},inplace=True)

Finally, we can merge all the dataframes now that we can identify the columns:

In [None]:
limit = int(len(dataframes_list)+1)

df_esios=df_objective.copy()
for i in dataframes_list[:limit]:
    df_esios=df_esios.merge(i.iloc[:,0:4],how='left',on=['datetime_utc','datetime','tz_time']).drop_duplicates()

We may create some other columns that may be useful for mergin other dataframes:

In [None]:
df_esios['day']=df_esios['datetime_utc'].dt.day
df_esios['month']=df_esios['datetime_utc'].dt.month
df_esios['year']=df_esios['datetime_utc'].dt.year
df_esios['hour']=df_esios['datetime_utc'].dt.hour
df_esios['quarter']=df_esios['datetime_utc'].dt.quarter
df_esios['datetime']=pd.to_datetime(df_esios['datetime'],utc=True)

### 3. WorldBank info

Some info of the WorldBank API has been requested related to the industry and electricity sector. 

In [None]:
worldbank_indicators=['FP.CPI.TOTL','IC.ELC.TIME','FP.CPI.TOTL.ZG','SL.IND.EMPL.ZS']

In [None]:
worldbank_list = [data_rec.worldbank_info(wb) for wb in worldbank_indicators]

We can write a dictionary with the name of each indicator:

In [None]:
dict_worldbank={'FP.CPI.TOTL':'Consumer_price_index',
               'IC.ELC.TIME':'Time_required_to_get_electricity_(days)',
               'FP.CPI.TOTL.ZG':'Inflation,consumer_prices_(annual_%)',
               'SL.IND.EMPL.ZS':'Employment_in_industry_(%_of_total_employment)'}

After we get the dataframes, we merge them:

In [None]:
limit = int(len(worldbank_list)+1)

df_esios2=df_esios.copy()
for i,datafr in enumerate(worldbank_list[:limit]):
    df_esios2=df_esios2.merge(datafr.iloc[:,0:4],how='left',left_on='year',right_on='date').drop_duplicates().drop('date',axis=1)
    df_esios2.rename(columns={'value':dict_worldbank[worldbank_indicators[i]]},inplace=True)

### 4. National holidays

It is important to know the national holidays in Spain so we are creating a table with this days to include this info in our final dataframe

In [None]:
holidays = data_rec.national_holidays()

In [None]:
df_esios3=df_esios2.merge(holidays,how='left',on=['day','month'])\
                    .rename(columns={'value':'holidays'})

df_esios3['holidays'].fillna(0,inplace=True)

### 5. GDP

We are adding the Gross Domestic Product in Spain, so we can include some more economic-social data.

In [None]:
gdp = data_rec.pib_data()

In [None]:
# Get the columns to merge
gdp['quarter'],gdp['str'],gdp['year']=zip(*gdp['Fecha'].str.split())
# Replace the values 
gdp['quarter'].replace({'I':'1','II':'2','III':'3','IV':'4'},inplace=True)
# Change the datatype to integer
gdp[['quarter','year']]=gdp[['quarter','year']].astype('int')
# We drop columns that we don't need
gdp.drop('str',inplace=True,axis=1)

In [None]:
df_esios4=df_esios3.merge(gdp,how='left',on=['quarter','year'])\
                    .drop(['Fecha','PIB Trimestral.1'],axis=1)\
                    .rename({'PIB Trimestral':'PIB_Trimestral',
                            'Var. Trim. PIB (%)':'Var_Trim_PIB_(%)',
                            'Var. anual PIB Trim. (%)':'Var_anual_PIB_Trim_(%)'},axis=1)

### 6. Merging Finance info

As we said before, the financial data gives problems when requesting them after the esios info, so in order to get everything correct, we are now merging the info that we collected before:

In [None]:
limit = int(len(stock_market_list)+1)

df_final=df_esios4.copy()
for i,datafr in enumerate(stock_market_list[:limit]):
    datafr.index=pd.to_datetime(datafr.index, utc = True)
    df_final=df_final.merge(datafr.iloc[:,5].to_frame(),how='left',right_index=True,left_on='datetime').drop_duplicates()
    df_final.rename(columns={'Adj Close':stock_market_dict[stock_market_indicators[i]]},inplace=True)

### 7. Weather data

```
aemet_api='eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJqb2VsLmRlbGFjcnV6ZnVlcnRlc0Bob3RtYWlsLmNvbSIsImp0aSI6IjllYTk2Mzc3LWIxNWItNDAyYS04MmMzLTNjMzVjMzA2ODQ4NCIsImlzcyI6IkFFTUVUIiwiaWF0IjoxNTg4NDMzOTQ5LCJ1c2VySWQiOiI5ZWE5NjM3Ny1iMTViLTQwMmEtODJjMy0zYzM1YzMwNjg0ODQiLCJyb2xlIjoiIn0.rTkcngrv3uJf4RRcJbM14af19pfE5eTT6edG1i-JyFY'
```

``` python
url = "https://opendata.aemet.es/opendata/api/valores/climatologicos/inventarioestaciones/todasestaciones/"

querystring = {"api_key":aemet_api}

headers = {
    'cache-control': "no-cache"
    }

response = requests.request("GET", url, headers=headers, params=querystring)

print(response.text)
```

 ### Exporting to csv

In [None]:
df_final.to_csv('../Exploring_data/TFM_dataframe.csv',index=False)