### Libraries

In [1]:
import pandas as pd#data structures and operations for manipulating numerical tables and time series.
import pandas_datareader.data as data#Up-to-date remote data access for pandas.
import datetime as dt#supplies classes for manipulating dates and times
from datetime import timedelta
import matplotlib.pyplot as plt#plotting library for the Python.
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf 
import seaborn as sns;# visualizacion
import warnings
warnings.filterwarnings('ignore') # We can suppress the warnings
from scipy import stats
import statsmodels.api as sm
from scipy.stats import iqr
import warnings
import plotly.express as px
import plotly.graph_objects as go


In [2]:
dateparse = lambda dates: pd.datetime.strptime(dates, '%Y-%m-%d')
EDPR= pd.read_csv('EDPR.csv',index_col='Date',parse_dates=['Date'],date_parser=dateparse, dayfirst=True)
EDPR.head(2)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,WeekDay,Month,Year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-01-04,7.1351,7.1351,7.1351,7.1351,0.0,6.866566,Monday,1,2016
2016-01-05,7.3006,7.3006,7.3006,7.3006,150.0,7.025838,Tuesday,1,2016


In [3]:
dateparse = lambda dates: pd.datetime.strptime(dates, '%Y-%m-%d')
VWS= pd.read_csv('VWS.csv',index_col='Date', parse_dates=['Date'],date_parser=dateparse, dayfirst=True)
VWS.head(2)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close,WeekDay,Month,Year
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-01-04,483.998474,475.468903,473.5,475.468903,2528.0,464.095337,Monday,1,2016
2016-01-05,476.799988,465.43219,479.899994,475.200012,1738.0,463.832916,Tuesday,1,2016


In [4]:
def plot_corr(timeserie):
    '''
    Heatmap is a data visualization technique that shows magnitude of a phenomenon as color in two dimensions.
    We pass a Pearson correlation coefficient to measures the strength of the linear relationship between two variables. 
    It has a value between -1 to 1 
    -1 negative linear correlation,
    0 being no correlation
    + 1 meaning a total positive correlation.
    
    '''
    plt.figure(figsize = (12, 3))
    heatmap= sns.heatmap(timeserie.corr(),vmin=-1, vmax=1, annot = True, cmap="YlGnBu")
    heatmap.set_title('Correlation Heatmap', fontdict={'fontsize':18}, pad=16);
    plt.show()

In [5]:
def Close_Vol(timeserie, title):
    '''
    Plot show us the historical price and volume charts to notice trends and patterns,
    and locate the price levels at which investors are particularly active.
    
    '''
    #selecting colors
    colors = {'red': '#ff207c', 'grey': '#42535b', 'blue': '#207cff', 'orange': '#ffa320', 'green': '#00ec8b'}
    config_ticks = {'size': 14, 'color': colors['grey'], 'labelcolor': colors['grey']}
    config_title = {'size': 18, 'color': colors['grey'], 'ha': 'left', 'va': 'baseline'}
    plt.rc('figure', figsize=(15, 10))
    
    #setting the figure.
    fig, axes = plt.subplots(2, 1,
                gridspec_kw={'height_ratios': [3, 1]}, figsize=(12, 8))
    fig.tight_layout(pad=3)
    
    #selecting the columns to use
    date = timeserie.index
    close = timeserie['Close']
    vol = timeserie['Volume']
    
    #Plot 1= Historical price
    plot_close = axes[0]
    plot_close.plot(date, close, color='blue', 
    linewidth=2, label='Price')
    
    #Plot 2= Historical Volume
    plot_vol = axes[1]
    plot_vol.bar(date, vol, width=15, color='red')
    
    #Plot 1=selecting visual parameters
    plot_close.yaxis.tick_right()
    plot_close.tick_params(axis='both', **config_ticks)
    plot_close.set_ylabel('Close (in EUR)', fontsize=14)
    plot_close.yaxis.set_label_position("right")
    plot_close.yaxis.label.set_color(colors['grey'])
    plot_close.grid(axis='y', color='gainsboro', 
    linestyle='-', linewidth=0.5)
    plot_close.set_axisbelow(True)
    
    #Plot 2=selecting visual parameters
    plot_vol.yaxis.tick_right()
    plot_vol.tick_params(axis='both', **config_ticks)
    plot_vol.set_ylabel('Volume (in Millions)', fontsize=14)
    plot_vol.yaxis.set_label_position("right")
    plot_vol.yaxis.label.set_color(colors['grey'])
    plot_vol.grid(axis='y', color='gainsboro', 
    linestyle='-', linewidth=0.5)
    plot_vol.set_axisbelow(True)
    
    #adding title
    plot_close.set_title(title)


In [6]:
# graphs to show seasonal_decompose
def ETS_Decomposition (timeseries):
    '''
    Plots show you a breaking down of the series into its trend, seasonality and noise components.
    '''
    decomposition = sm.tsa.seasonal_decompose(timeseries, model='mul',freq=30)
    fig = decomposition.plot()
    fig.set_size_inches(14,7)
    plt.show()

In [7]:
### Plot for testing Stationarity with mean and standard deviation
def plot_stationarity(timeseries, title):
    '''
    
    Plot that gives a visual representation of the dataset to define its stationarity. 
    If the time series plus its mean and Standard deviation stand together it is stationary,
    if not is non-stationary.
 
    '''
    #Determing rolling statistics
    mean = pd.Series(timeseries).rolling(30).mean()#rolling a month
    std = pd.Series(timeseries).rolling(30).std()#rolling a month
    
    #setting the figure
    fig, ax = plt.subplots(figsize=(16, 4))
    #ploting
    ax.plot(timeseries, label= title)
    ax.plot(mean, label='rolling mean');
    ax.plot(std, label='rolling std');
    ax.legend()

In [8]:
def plot_ACF_PACF(timeseries, title):
    '''
    An ACF measures the average correlation between data points in a time series
    and previous values of the series measured for different lag lengths.
    A PACF measures each partial correlation controls for any correlation between observations of a shorter lag length.
    
    '''
    #setting the figure
    plt.figure(figsize=(12,8))
    layout = (2, 2)
    ax_xt = plt.subplot2grid(layout, (0, 0), colspan = 2)
    ax_acf= plt.subplot2grid(layout, (1, 0))
    ax_pacf = plt.subplot2grid(layout, (1, 1))
    
    #selecting visual parameters
    timeseries.plot(ax = ax_xt)
    ax_xt.set_title(title)
    plot_acf(timeseries[1:], lags = 50, ax = ax_acf) # first value of diff is NaN
    plot_pacf(timeseries[1:], lags = 50, ax = ax_pacf) # first value of diff is NaN
    plt.tight_layout()

In [9]:
def plot_models(timeserie, model, title):
    
    '''
    Plot that show the time series and the performance of the model side by side.
    if they have the same behaviour, better is the accuracy.
    '''
    #setting the figure
    f, axarr = plt.subplots(1, sharex = True)
    f.set_size_inches(15, 6)
    
    #selecting visual parameters
    timeserie.iloc[1:].plot(color = 'b', linestyle = '-', ax = axarr)
    model.iloc[1:].plot(color = 'r', linestyle = '--', ax = axarr)
    axarr.set_title(title)
    plt.legend()
    plt.xlabel('Index')
    plt.ylabel('Closing price')