In [None]:
import json
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import statsmodels.tsa.stattools as sm
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.seasonal import seasonal_decompose
from pmdarima.arima import auto_arima
import statsmodels.api as sma

%config IPCompleter.greedy=True
pd.set_option('display.max_rows', 50)

#Definimos la funcion para importar el JSON con los datos de las ventas
def import_data():
    with open("fishVigo.json") as file:
        dataset = pd.read_json(file)
        return dataset

#Asignamos el resultado de la funcion a una variable y lo convertimos a DataFrame
df = pd.DataFrame(import_data())
df.date = pd.to_datetime(df.date)

#Seteamos un indice basado en la fecha y la especie y eliminamos las filas duplicadas quedandonos con la primera de ellas
df = df.set_index(['date','name'])
df = df[~df.index.duplicated(keep = 'first')]

#Convertimos la columna de cantidades a numerico
df['quantity'] = df['quantity'].str.replace('Kg.','')
df['quantity'] = df['quantity'].str.replace('.','')
df['quantity'] = df['quantity'].str.replace(',','.')
df['quantity'] = pd.to_numeric(df['quantity'])
df = df['quantity']

#Mediante reshape creamos registros a 0. Ahora para todos los días tenemos datos de todas las especies
df = df.unstack().fillna(0).stack()
df.head

In [None]:
class StationarityTests:
    def __init__(self, significance=.05):
        self.SignificanceLevel = significance
        self.pValue = None
        self.isStationary = None
        
    def ADF_Stationarity_Test(self, timeseries, printResults = True):
        #Dickey-Fuller test:
        adfTest = sm.adfuller(timeseries, autolag='AIC')
        
        self.pValue = adfTest[1]
        
        if (self.pValue<self.SignificanceLevel):
            self.isStationary = True
        else:
            self.isStationary = False
        
        if printResults:
            dfResults = pd.Series(adfTest[0:4], index=['ADF Test Statistic','P-Value','# Lags Used','# Observations Used'])
            #Add Critical Values
            for key,value in adfTest[4].items():
                dfResults['Critical Value (%s)'%key] = value
            print('Augmented Dickey-Fuller Test Results:')
            print(dfResults)

In [None]:
#Sacamos el porcentaje de cuanto ha aportado al total de capturas cada especie
total = df.sum()
groupName = df.groupby('name').sum()
percentage = groupName/total*100
top = 5

#Seleccionamos aquellas especies cuyo aporte a las capturas supone mas del 5%
percentage = pd.DataFrame(percentage[percentage>top])
selectedSpecies = list(percentage.index.unique())
percentage.columns = ['quantity']
percentage = percentage.reset_index()

#Dibujamos
bar = go.Figure(go.Bar(x = percentage.name, y = percentage.quantity))
bar = bar.update_layout(title='Top '+str(top)+' Cuota',yaxis_title="% Porcentaje")
bar.show()

In [None]:
#Extraemos del df aquellos datos pertenecientes a las especies seleccionadas
for x in selectedSpecies:
    dfTop = df.xs(x, level=1, drop_level=False)
    
    dfTop = pd.DataFrame(dfTop)
    
    dfTop.columns = ['quantity']
    dfTop = dfTop.reset_index()
    
    fig = go.Figure(data = go.Scatter(x=dfTop.date, y=dfTop.quantity),
                                        layout_title_text = x)
    fig.show()

In [None]:
for x in selectedSpecies:
    df1 = df.xs(x, level=1, drop_level=False)

    sTest = StationarityTests()
    sTest.ADF_Stationarity_Test(df1, printResults = True)
    print("Is the time series " + x + " stationary? {0}".format(sTest.isStationary))

In [None]:
dfPredict = df.xs('GALLO', level=1, drop_level=True)

dfPredict = dfPredict.asfreq('B',)

dfPredict[dfPredict.isnull()==True] = 0

model = sma.tsa.statespace.SARIMAX(dfPredict, order=(0, 1, 1), seasonal_order=(0, 1, 1, 12))
results = model.fit()
#plot = results.plot(500, 600)
results.plot_diagnostics(figsize=(18, 8))

#model.geterrors()

pred = results.get_prediction(start=pd.to_datetime('2018-01-03'), dynamic=False)
pred_ci = pred.conf_int()
pred_ci.plot()

In [None]:
for x in selectedSpecies:
    y = "./plots/test" + x + "png"
    
    dfDecomp = df.xs(x, level=1, drop_level=True)
    dfDecomp = dfDecomp.asfreq('B',)
    
    dfDecomp[dfDecomp.isnull()==True] = 0
    
    result = seasonal_decompose(dfDecomp, model='additive')
    fig = result.plot()
    fig.savefig(y,dpi=500)

In [None]:
dfARIMA = df.xs('RAPE-SAPO', level=1, drop_level=True)
dfARIMA = dfARIMA.asfreq('B',)

dfARIMA[dfARIMA.isnull()==True] = 0

stepwise_model = auto_arima(dfARIMA, start_p=0, start_q=0,
                           max_p=3, max_q=3, m=5,
                           start_P=0, seasonal=True,
                           d=None, D=None, trace=True,
                           error_action='ignore',  
                           suppress_warnings=True, 
                           stepwise=True)
#print(stepwise_model.aic())
result = stepwise_model.fit(dfARIMA)