In [1]:
import vaex as vx
import dask.dataframe as dd
import pandas as pd
import numpy as np
import os
from datetime import datetime
from dateutil import parser
import matplotlib.pyplot as plt
from matplotlib import dates as mpl_dates
from scipy.signal import savgol_filter
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from statsmodels.tsa.stattools import acf, pacf, arma_order_select_ic
from sklearn.model_selection import ParameterGrid
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from data_struct import dtypes

# univariate lstm example
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten

import warnings
warnings.filterwarnings('ignore')

%matplotlib widget
%load_ext line_profiler

In [2]:
plt.rcParams.update({
    "lines.color": "white",
    "patch.edgecolor": "white",
    "text.color": "black",
    "axes.facecolor": "white",
    "axes.edgecolor": "lightgray",
    "axes.labelcolor": "white",
    "xtick.color": "white",
    "ytick.color": "white",
    "grid.color": "lightgray",
    "figure.facecolor": "black",
    "figure.edgecolor": "black",
    "savefig.facecolor": "black",
    "savefig.edgecolor": "black"})
plt.style.use('dark_background')


In [3]:
path = os.getcwd()

In [4]:
# Imports data
parse_dates = ['Sale Date', 'Sale Day', 'Sale Date Local']

#custom_dates = lambda x: parser.parse(x) or pd.to_datetime
#You can try both to check speed of import
try:
    data = vx.open('%s//Data//consumption_data.csv.hdf5'%path)
except (FileNotFoundError):
    data = vx.from_csv('%s//Data//consumption_data.csv'%path, 
                       dtype=dtypes, parse_dates=parse_dates, date_parser=pd.to_datetime,
            chunk_size=100_000, convert=True)

In [5]:
# Viewing Data
display(data.head())

#,Service Transaction ID,Sale ID,Sale Date,Sale Item ID,Sale Facility Country,Sale Day,Sale Facility,Sale Facility ID,Mph Arma ID,Customer Type,Vdl Drug ID,Vdl Drug Display Name,Unit Selling Price Local,Unit Selling Price Usd,Quantity In Units,Quantity In Packs,Sale Item Selling Price Local,Sale Item Selling Price Usd,Unit Vm I Cost Price Local,Sale Item Vm I Cost Price Local,Sale Item Vm I Cost Price Usd,Sale Item Vm I Margin Local,Sale Item Vm I Margin Usd,Unit Cost Price Local,Unit Cost Price Usd,Sale Item Cost Price Local,Sale Item Cost Price Usd,Sale Item Margin Local,Sale Item Margin Usd,Payment Method Type,Payment Timing Type,Product ID,Product Name,Product Alias ID,Product Alias Name,Patient Facility ID,Patient Facility Name,Patient Risk Tier Type,Corporate Facility Name,Customer Corporate Type,Facility Country,Sale Date Local,Item Category,Item Sub Category,Insurance Pay Or,Product Source,Vat Local (for The It Em),Vat Usd (for The It Em),Is Manual,Receipt ID,How Its Sold,Is Off Formula Ry,Mv Drug Abc → Country,Mv Drug Abc → Class,Mv Drug Abc → Class By Value,Mv Drug Abc → Class By Quantity,Mv Drug Abc → Class By Transaction Count,Mv Drug Abc → Class By Margin
0,91448,53b21d1b-5f91-4498-81cc-c5c78e5296b8,2020-02-22 20:39:03.550000000,10634,Ghana,2020-02-22 00:00:00.000000000,3A HEALTH,5e4a8e5ded73cf0076d21dfc,3670829,guest,165693,LONART DS x1,15.93,2.99195,1,1.0,15.93,2.99195,0,0,0,15.93,2.99195,0,0,0,0,15.93,2.99195,Cash,Time of Sale,d3d12db6-c8ac-4bad-9275-0df1bf542501,LONART DS x1,eeddbfa9-c5c5-4e07-bcc8-73b75a8567b7,LONART DS x1,5e4a8e5ded73cf0076d21dfc,3A HEALTH,No Policy,--,guest,Ghana,2020-02-22 20:39:03.550000000,DRUGS,Anti-Infectives,,formulary,0,0,False,1DFC-0000000001,TABLET,False,Ghana,A,A,B,A,A
1,91466,456beab8-433b-43f1-ba0c-0b9a3c62ca45,2020-02-22 13:07:50.726000000,10647,Ghana,2020-02-22 00:00:00.000000000,Narteva Pharmacy,5e4a8caded73cf0076d21df4,9616396,member,228432,LOFNAC 100MG TAB 10'S x20,1.25,0.234773,1,0.05,1.25,0.234773,0,0,0,1.25,0.234773,0,0,0,0,1.25,0.234773,Cash,Time of Sale,09e94d4e-d77e-4b03-8f18-23e7df323d7e,LOFNAC 100MG TAB 10'S x20,e87b0b20-dcd5-4ba7-84df-b55895d96b4f,LOFNAC 100MG TAB 10'S x20,5e4a8caded73cf0076d21df4,Narteva Pharmacy,No Policy,--,non-corporate member,Ghana,2020-02-22 13:07:50.726000000,DRUGS,Analgesics,,formulary,0,0,False,1DF4-0000000011,TABLET,False,Ghana,A,B,B,A,B
2,91494,8b87d8fa-12d6-43c2-93ad-5fecc1edb1f6,2020-02-22 21:48:41.710000000,10662,Ghana,2020-02-22 00:00:00.000000000,3A HEALTH,5e4a8e5ded73cf0076d21dfc,3670829,guest,165649,FERROUS SULPHATE x28 UK,0.39,0.0732493,2,0.0714286,0.78,0.146499,0,0,0,0.78,0.146499,0,0,0,0,0.78,0.146499,Cash,Time of Sale,86bfb6d9-89b2-48a3-8ccd-db0835ab1c94,FERROUS SULPHATE x28 UK,3aa1c70f-3bf9-4fd2-a5e2-b3a5ea1c084c,FERROUS SULPHATE x28 UK,5e4a8e5ded73cf0076d21dfc,3A HEALTH,No Policy,--,guest,Ghana,2020-02-22 21:48:41.710000000,DRUGS,Supplements,,formulary,0,0,False,1DFC-0000000019,TABLET,False,--,--,--,--,--,--
3,91507,9a27e282-45f6-40ef-8662-29c2f689e07c,2020-02-22 22:05:22.955000000,10667,Ghana,2020-02-22 00:00:00.000000000,3A HEALTH,5e4a8e5ded73cf0076d21dfc,3670829,guest,169837,CITRO-C 100MG x25,1.66,0.311779,2,0.08,3.32,0.623558,0,0,0,3.32,0.623558,0,0,0,0,3.32,0.623558,Cash,Time of Sale,b0337df8-f7fb-4ef4-83d9-8e087933ab4e,CITRO-C 100MG x25,8f5adcea-420b-49fb-a22f-b76b3802cbbf,CITRO-C 100MG x25,5e4a8e5ded73cf0076d21dfc,3A HEALTH,No Policy,--,guest,Ghana,2020-02-22 22:05:22.955000000,DRUGS,Supplements,,formulary,0,0,False,1DFC-0000000022,TABLET,False,Ghana,A,A,B,A,B
4,91567,fd0be28a-8748-46fa-8765-669dbf127076,2020-02-22 15:14:34.437000000,10705,Ghana,2020-02-22 00:00:00.000000000,Narteva Pharmacy,5e4a8caded73cf0076d21df4,2051523,member,234450,MULTIVITAMIN TABS BLISTER 10'S x50,0.22,0.0413201,30,0.6,6.6,1.2396,0,0,0,6.6,1.2396,0,0,0,0,6.6,1.2396,Cash,Time of Sale,0b6cd367-9015-49b4-8cdf-e8b5234d3183,MULTIVITAMIN TABS BLISTER 10'S x50,8eee24aa-399f-4a03-9e6f-beb4633c248f,MULTIVITAMIN TABS BLISTER 10'S x50,5e4a8caded73cf0076d21df4,Narteva Pharmacy,No Policy,--,non-corporate member,Ghana,2020-02-22 15:14:34.437000000,DRUGS,Supplements,,formulary,0,0,False,1DF4-0000000014,TABLET,False,Ghana,A,C,B,A,C
5,91656,25370f27-0eb7-433f-9a5c-ea7594add8d5,2020-02-23 01:38:15.335000000,10762,Ghana,2020-02-23 00:00:00.000000000,3A HEALTH,5e4a8e5ded73cf0076d21dfc,3670829,guest,165597,CYPRODINE CAPS x1,29.59,5.55756,1,1.0,29.59,5.55756,0,0,0,29.59,5.55756,0,0,0,0,29.59,5.55756,Cash,Time of Sale,1f835d60-10bb-421f-9ec4-c5999691d2cc,CYPRODINE CAPS x1,81fbc10f-5e28-486c-aed3-d7aac56f382c,CYPRODINE CAPS x1,5e4a8e5ded73cf0076d21dfc,3A HEALTH,No Policy,--,guest,Ghana,2020-02-23 01:38:15.335000000,DRUGS,Supplements,,formulary,0,0,False,1DFC-0000000056,TABLET,False,Ghana,A,A,C,B,A
6,91676,8b47d255-7c60-4cb8-ab91-2c8d7d30e8fc,2020-02-23 01:53:53.369000000,10777,Ghana,2020-02-23 00:00:00.000000000,3A HEALTH,5e4a8e5ded73cf0076d21dfc,3670829,guest,173007,LOFNAC 100 SUPP x10,0.93,0.174671,10,1.0,9.3,1.74671,0,0,0,9.3,1.74671,0,0,0,0,9.3,1.74671,Cash,Time of Sale,a3d322c1-297a-4c1e-aaa3-1b502e6d55b2,LOFNAC 100 SUPP x10,2f35fb81-be8d-47de-b52c-bee525c52cb7,LOFNAC 100 SUPP x10,5e4a8e5ded73cf0076d21dfc,3A HEALTH,No Policy,--,guest,Ghana,2020-02-23 01:53:53.369000000,DRUGS,Analgesics,,formulary,0,0,False,1DFC-0000000065,SUPPOSITORY,False,Ghana,B,B,B,B,B
7,91743,5e3ddb1e-af11-42c2-a27a-9701e911351f,2020-02-23 03:00:25.923000000,10816,Ghana,2020-02-23 00:00:00.000000000,3A HEALTH,5e4a8e5ded73cf0076d21dfc,3670829,guest,228228,GEBEDOL TAB x6,1.17,0.219748,1,0.166667,1.17,0.219748,0,0,0,1.17,0.219748,0,0,0,0,1.17,0.219748,Cash,Time of Sale,b69cfdc5-9606-4d09-8e69-fc71f0790c06,GEBEDOL TAB x6,f623fb09-f311-4b18-9caf-39ce39f3d6b1,GEBEDOL TAB x6,5e4a8e5ded73cf0076d21dfc,3A HEALTH,No Policy,--,guest,Ghana,2020-02-23 03:00:25.923000000,DRUGS,Analgesics,,formulary,0,0,False,1DFC-0000000075,TABLET,False,Ghana,A,A,B,A,A
8,91745,7b803d2e-216a-4e74-8821-a237d471d2b5,2020-02-23 03:02:34.736000000,10818,Ghana,2020-02-23 00:00:00.000000000,3A HEALTH,5e4a8e5ded73cf0076d21dfc,3670829,guest,147625,AMOXICILLIN 500MG x21,0.53,0.0995439,21,1.0,11.13,2.09042,0,0,0,11.13,2.09042,0,0,0,0,11.13,2.09042,Cash,Time of Sale,0a10d97c-c473-4472-b072-184e7699c1c8,AMOXICILLIN 500MG x21,7640955c-81f7-4f49-abb6-5116df3482b9,AMOXICILLIN 500MG x21,5e4a8e5ded73cf0076d21dfc,3A HEALTH,No Policy,--,guest,Ghana,2020-02-23 03:02:34.736000000,DRUGS,Anti-Infectives,,formulary,0,0,False,1DFC-0000000076,CAPSULE,False,Ghana,B,B,B,B,B
9,91873,548c5ff2-8e51-4504-a409-c29c5bc9d4c7,2020-02-23 05:01:06.210000000,10883,Ghana,2020-02-23 00:00:00.000000000,3A HEALTH,5e4a8e5ded73cf0076d21dfc,7913554,member,203533,DEEP HEAT SPRAY 150ML x1,30.29,5.68903,1,1.0,30.29,5.68903,0,0,0,30.29,5.68903,0,0,0,0,30.29,5.68903,Cash,Time of Sale,efd8cc44-fba6-48f3-b24a-f93f5fb80371,DEEP HEAT SPRAY 150ML x1,8827af06-53c0-48e4-ae49-f469160d8075,DEEP HEAT SPRAY 150ML x1,5e4a8e5ded73cf0076d21dfc,3A HEALTH,No Policy,--,non-corporate member,Ghana,2020-02-23 05:01:06.210000000,DRUGS,Analgesics,,formulary,0,0,False,1DFC-0000000090,SPRAY,False,Ghana,B,B,C,B,B


In [6]:
# Place all Column Names in a List
col_names = data.get_column_names()
# Renaming all columns to fit Vaex standards
for i in col_names:
    data.rename(i,i.replace(" ", "_").replace("-","_"))


In [7]:
data.info()

column,type,unit,description,expression
Service_Transaction_ID,str,,,
Sale_ID,str,,,
Sale_Date,datetime64[ns],,,
Sale_Item_ID,str,,,
Sale_Facility_Country,str,,,
Sale_Day,datetime64[ns],,,
Sale_Facility,str,,,
Sale_Facility_ID,str,,,
Mph_Arma_ID,str,,,
Customer_Type,str,,,

#,Service_Transaction_ID,Sale_ID,Sale_Date,Sale_Item_ID,Sale_Facility_Country,Sale_Day,Sale_Facility,Sale_Facility_ID,Mph_Arma_ID,Customer_Type,Vdl_Drug_ID,Vdl_Drug_Display_Name,Unit_Selling_Price_Local,Unit_Selling_Price_Usd,Quantity_In_Units,Quantity_In_Packs,Sale_Item_Selling_Price_Local,Sale_Item_Selling_Price_Usd,Unit_Vm_I_Cost_Price_Local,Sale_Item_Vm_I_Cost_Price_Local,Sale_Item_Vm_I_Cost_Price_Usd,Sale_Item_Vm_I_Margin_Local,Sale_Item_Vm_I_Margin_Usd,Unit_Cost_Price_Local,Unit_Cost_Price_Usd,Sale_Item_Cost_Price_Local,Sale_Item_Cost_Price_Usd,Sale_Item_Margin_Local,Sale_Item_Margin_Usd,Payment_Method_Type,Payment_Timing_Type,Product_ID,Product_Name,Product_Alias_ID,Product_Alias_Name,Patient_Facility_ID,Patient_Facility_Name,Patient_Risk_Tier_Type,Corporate_Facility_Name,Customer_Corporate_Type,Facility_Country,Sale_Date_Local,Item_Category,Item_Sub_Category,Insurance_Pay_Or,Product_Source,Vat_Local_(for_The_It_Em),Vat_Usd_(for_The_It_Em),Is_Manual,Receipt_ID,How_Its_Sold,Is_Off_Formula_Ry,Mv_Drug_Abc_→_Country,Mv_Drug_Abc_→_Class,Mv_Drug_Abc_→_Class_By_Value,Mv_Drug_Abc_→_Class_By_Quantity,Mv_Drug_Abc_→_Class_By_Transaction_Count,Mv_Drug_Abc_→_Class_By_Margin
0,91448,53b21d1b-5f91-4498-81cc-c5c78e5296b8,2020-02-22 20:39:03.550000000,10634,Ghana,2020-02-22 00:00:00.000000000,3A HEALTH,5e4a8e5ded73cf0076d21dfc,3670829,guest,165693,LONART DS x1,15.93,2.9919522,1.0,1.0,15.93,2.9919522,0.0,0.0,0.0,15.93,2.9919522,0.0,0.0,0.0,0.0,15.93,2.9919522,Cash,Time of Sale,d3d12db6-c8ac-4bad-9275-0df1bf542501,LONART DS x1,eeddbfa9-c5c5-4e07-bcc8-73b75a8567b7,LONART DS x1,5e4a8e5ded73cf0076d21dfc,3A HEALTH,No Policy,--,guest,Ghana,2020-02-22 20:39:03.550000000,DRUGS,Anti-Infectives,,formulary,0.0,0.0,false,1DFC-0000000001,TABLET,false,Ghana,A,A,B,A,A
1,91466,456beab8-433b-43f1-ba0c-0b9a3c62ca45,2020-02-22 13:07:50.726000000,10647,Ghana,2020-02-22 00:00:00.000000000,Narteva Pharmacy,5e4a8caded73cf0076d21df4,9616396,member,228432,LOFNAC 100MG TAB 10'S x20,1.25,0.2347734,1.0,0.05,1.25,0.2347734,0.0,0.0,0.0,1.25,0.2347734,0.0,0.0,0.0,0.0,1.25,0.2347734,Cash,Time of Sale,09e94d4e-d77e-4b03-8f18-23e7df323d7e,LOFNAC 100MG TAB 10'S x20,e87b0b20-dcd5-4ba7-84df-b55895d96b4f,LOFNAC 100MG TAB 10'S x20,5e4a8caded73cf0076d21df4,Narteva Pharmacy,No Policy,--,non-corporate member,Ghana,2020-02-22 13:07:50.726000000,DRUGS,Analgesics,,formulary,0.0,0.0,false,1DF4-0000000011,TABLET,false,Ghana,A,B,B,A,B
2,91494,8b87d8fa-12d6-43c2-93ad-5fecc1edb1f6,2020-02-22 21:48:41.710000000,10662,Ghana,2020-02-22 00:00:00.000000000,3A HEALTH,5e4a8e5ded73cf0076d21dfc,3670829,guest,165649,FERROUS SULPHATE x28 UK,0.39,0.0732493,2.0,0.071428575,0.78,0.1464986,0.0,0.0,0.0,0.78,0.1464986,0.0,0.0,0.0,0.0,0.78,0.1464986,Cash,Time of Sale,86bfb6d9-89b2-48a3-8ccd-db0835ab1c94,FERROUS SULPHATE x28 UK,3aa1c70f-3bf9-4fd2-a5e2-b3a5ea1c084c,FERROUS SULPHATE x28 UK,5e4a8e5ded73cf0076d21dfc,3A HEALTH,No Policy,--,guest,Ghana,2020-02-22 21:48:41.710000000,DRUGS,Supplements,,formulary,0.0,0.0,false,1DFC-0000000019,TABLET,false,--,--,--,--,--,--
3,91507,9a27e282-45f6-40ef-8662-29c2f689e07c,2020-02-22 22:05:22.955000000,10667,Ghana,2020-02-22 00:00:00.000000000,3A HEALTH,5e4a8e5ded73cf0076d21dfc,3670829,guest,169837,CITRO-C 100MG x25,1.66,0.31177908,2.0,0.08,3.32,0.62355816,0.0,0.0,0.0,3.32,0.62355816,0.0,0.0,0.0,0.0,3.32,0.62355816,Cash,Time of Sale,b0337df8-f7fb-4ef4-83d9-8e087933ab4e,CITRO-C 100MG x25,8f5adcea-420b-49fb-a22f-b76b3802cbbf,CITRO-C 100MG x25,5e4a8e5ded73cf0076d21dfc,3A HEALTH,No Policy,--,guest,Ghana,2020-02-22 22:05:22.955000000,DRUGS,Supplements,,formulary,0.0,0.0,false,1DFC-0000000022,TABLET,false,Ghana,A,A,B,A,B
4,91567,fd0be28a-8748-46fa-8765-669dbf127076,2020-02-22 15:14:34.437000000,10705,Ghana,2020-02-22 00:00:00.000000000,Narteva Pharmacy,5e4a8caded73cf0076d21df4,2051523,member,234450,MULTIVITAMIN TABS BLISTER 10'S x50,0.22,0.04132012,30.0,0.6,6.6,1.2396035,0.0,0.0,0.0,6.6,1.2396035,0.0,0.0,0.0,0.0,6.6,1.2396035,Cash,Time of Sale,0b6cd367-9015-49b4-8cdf-e8b5234d3183,MULTIVITAMIN TABS BLISTER 10'S x50,8eee24aa-399f-4a03-9e6f-beb4633c248f,MULTIVITAMIN TABS BLISTER 10'S x50,5e4a8caded73cf0076d21df4,Narteva Pharmacy,No Policy,--,non-corporate member,Ghana,2020-02-22 15:14:34.437000000,DRUGS,Supplements,,formulary,0.0,0.0,false,1DF4-0000000014,TABLET,false,Ghana,A,C,B,A,C
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
720150,50065,--,2019-11-17 13:40:09.826139000,--,Ghana,2019-11-17 00:00:00.000000000,CENTRIXMAS PHARMACY,5d8cd60090ef88007614bc73,6883562,guest,228432,LOFNAC 100MG TAB 10'S x20,1.35,0.25092936,1.0,0.05,1.35,0.25092936,0.0,0.0,0.0,1.35,0.25092936,1.0,0.18587361,1.0,0.18587361,0.35,0.065055765,Cash,Time of Sale,5cdab8a8d03b4f0070b3b68b,LOFNAC 100MG 10'S,--,--,5d8cd60090ef88007614bc73,CENTRIXMAS PHARMACY,No Policy,--,guest,Ghana,2019-11-17 13:40:09.826139000,DRUGS,Analgesics,,formulary,0.0,0.0,false,--,--,--,Ghana,A,B,B,A,B
720151,46791,--,2019-11-09 13:41:26.221462000,--,Ghana,2019-11-09 00:00:00.000000000,Daelyn Pharmacy,5d03c649d03b4f0080f33914,1834120,guest,228432,LOFNAC 100MG TAB 10'S x20,1.35,0.25092936,1.0,0.05,1.35,0.25092936,0.0,0.0,0.0,1.35,0.25092936,1.0,0.18587361,1.0,0.18587361,0.35,0.065055765,Cash,Time of Sale,5cdab8a8d03b4f0070b3b68b,LOFNAC 100MG 10'S,--,--,5d03c649d03b4f0080f33914,Daelyn Pharmacy,No Policy,--,guest,Ghana,2019-11-09 13:41:26.221462000,DRUGS,Analgesics,,formulary,0.0,0.0,false,--,--,--,Ghana,A,B,B,A,B
720152,66256,--,2019-12-23 20:04:53.364947000,--,Ghana,2019-12-23 00:00:00.000000000,Immaculate Pharmacy Ltd,5cd42cc8c8fb56008473b52b,5400865,member,229889,ALLERGY RELIEF TABS (CETIRIZINE 10MG) 10'S x1,16.17,3.0055761,1.0,1.0,16.17,3.0055761,0.0,0.0,0.0,16.17,3.0055761,14.7,2.732342,14.7,2.732342,1.47,0.2732342,Cash,Time of Sale,5ce27a48d03b4f0070b3b72d,ALLERGY RELIEF TABS (CETIRIZINE 10MG) 10'S,--,--,5cd42cc8c8fb56008473b52b,Immaculate Pharmacy Ltd,No Policy,--,non-corporate member,Ghana,2019-12-23 20:04:53.364947000,DRUGS,Anti-Allergy,,formulary,0.0,0.0,false,--,--,--,Ghana,C,C,C,C,C
720153,54506,--,2019-11-27 13:18:51.374416000,--,Ghana,2019-11-27 00:00:00.000000000,Emboch Chemist,5bc5d7149e31280026863a40,1898725,guest,234953,PERMOXYL 250MG CAPS x500,0.16,0.029739777,30.0,0.06,4.8,0.8921933,0.0,0.0,0.0,4.8,0.8921933,0.12,0.022304833,3.6,0.669145,1.2,0.22304833,Cash,Time of Sale,5cfa3df490ef880073b989e9,PERMOXYL 250MG,--,--,5bc5d7149e31280026863a40,Emboch Chemist,No Policy,--,guest,Ghana,2019-11-27 13:18:51.374416000,DRUGS,Anti-Infectives,,formulary,0.0,0.0,false,--,--,--,Ghana,A,B,A,B,B


In [8]:
col_names = data.get_column_names()

# Filtering Data to relevant Columns
df = data.copy()[[col_names[2], col_names[4], col_names[5], col_names[6], 
                col_names[9], col_names[10], col_names[11], col_names[12], col_names[13],
                col_names[14], col_names[15],col_names[41], col_names[43], col_names[44], col_names[50]]]

In [9]:
#Dropping Redundant Facilities
df = df[~df['Sale_Facility'].isin(['Emboch Chemist', 'Free Mart Pharmacy'])] 
df

#,Sale_Date,Sale_Facility_Country,Sale_Day,Sale_Facility,Customer_Type,Vdl_Drug_ID,Vdl_Drug_Display_Name,Unit_Selling_Price_Local,Unit_Selling_Price_Usd,Quantity_In_Units,Quantity_In_Packs,Sale_Date_Local,Item_Sub_Category,Insurance_Pay_Or,How_Its_Sold
0,2020-02-22 20:39:03.550000000,Ghana,2020-02-22 00:00:00.000000000,3A HEALTH,guest,165693,LONART DS x1,15.93,2.9919522,1.0,1.0,2020-02-22 20:39:03.550000000,Anti-Infectives,,TABLET
1,2020-02-22 13:07:50.726000000,Ghana,2020-02-22 00:00:00.000000000,Narteva Pharmacy,member,228432,LOFNAC 100MG TAB 10'S x20,1.25,0.2347734,1.0,0.05,2020-02-22 13:07:50.726000000,Analgesics,,TABLET
2,2020-02-22 21:48:41.710000000,Ghana,2020-02-22 00:00:00.000000000,3A HEALTH,guest,165649,FERROUS SULPHATE x28 UK,0.39,0.0732493,2.0,0.071428575,2020-02-22 21:48:41.710000000,Supplements,,TABLET
3,2020-02-22 22:05:22.955000000,Ghana,2020-02-22 00:00:00.000000000,3A HEALTH,guest,169837,CITRO-C 100MG x25,1.66,0.31177908,2.0,0.08,2020-02-22 22:05:22.955000000,Supplements,,TABLET
4,2020-02-22 15:14:34.437000000,Ghana,2020-02-22 00:00:00.000000000,Narteva Pharmacy,member,234450,MULTIVITAMIN TABS BLISTER 10'S x50,0.22,0.04132012,30.0,0.6,2020-02-22 15:14:34.437000000,Supplements,,TABLET
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
588881,2020-01-07 14:09:58.071529000,Ghana,2020-01-07 00:00:00.000000000,Fresh Spring Chemists Ltd,guest,240472,ENACIN-C 300MG CAPS x100,1.03,0.18739937,20.0,0.2,2020-01-07 14:09:58.071529000,Anti-Infectives,,--
588882,2019-11-17 13:40:09.826139000,Ghana,2019-11-17 00:00:00.000000000,CENTRIXMAS PHARMACY,guest,228432,LOFNAC 100MG TAB 10'S x20,1.35,0.25092936,1.0,0.05,2019-11-17 13:40:09.826139000,Analgesics,,--
588883,2019-11-09 13:41:26.221462000,Ghana,2019-11-09 00:00:00.000000000,Daelyn Pharmacy,guest,228432,LOFNAC 100MG TAB 10'S x20,1.35,0.25092936,1.0,0.05,2019-11-09 13:41:26.221462000,Analgesics,,--
588884,2019-12-23 20:04:53.364947000,Ghana,2019-12-23 00:00:00.000000000,Immaculate Pharmacy Ltd,member,229889,ALLERGY RELIEF TABS (CETIRIZINE 10MG) 10'S x1,16.17,3.0055761,1.0,1.0,2019-12-23 20:04:53.364947000,Anti-Allergy,,--


In [10]:
# Viewing the Relevant Columns
df.get_column_names()
# Sort Time Series
# df = df.sort(by='Sale_Date', ascending=True)

['Sale_Date',
 'Sale_Facility_Country',
 'Sale_Day',
 'Sale_Facility',
 'Customer_Type',
 'Vdl_Drug_ID',
 'Vdl_Drug_Display_Name',
 'Unit_Selling_Price_Local',
 'Unit_Selling_Price_Usd',
 'Quantity_In_Units',
 'Quantity_In_Packs',
 'Sale_Date_Local',
 'Item_Sub_Category',
 'Insurance_Pay_Or',
 'How_Its_Sold']

**ANALYSES PER FACILITY**

In [11]:
#For mock purposes we will go with the facility with the highest number of rows

count = 0
for facility in df['Sale_Facility'].unique():
    if len(df[df['Sale_Facility']==facility]) > count:
        count = len(df[df['Sale_Facility']==facility])
        largest = facility

print(f"{largest} is the largest facility in terms of volume of transactions with  {count} transactions")


Fresh Spring Chemists Ltd is the largest facility in terms of volume of transactions with  53037 transactions


In [12]:
#Using the same idea to get the Item with the highest volume of transactions on
fty_df = df[df['Sale_Facility'] == largest]

count = 0
for item_class in df['Item_Sub_Category'].unique():
    if len(df[df['Item_Sub_Category'] == item_class]) > count:
        count = len(df[df['Item_Sub_Category'] == item_class])
        largest = item_class

print(f"{largest} are sold more often with {count} volume of transactions")


Analgesics are sold more often with 124658 volume of transactions


In [13]:
#Since no data represents medications with no class intuitive conclusions cant be draw with regards to class.
#Lets go with Anti-Infectives instead

con_df = fty_df[fty_df['Item_Sub_Category'] == largest]

In [14]:
#Split the data into the respective Date Frequencies eg. Yearly, Monthly, Weekly, Daily, Hourly
#We are using quantity in Units because you can derive the pack size from it

freq_dict = {}
for i in zip(['W', 'M'], ['weekly', 'monthly']):
    freq_dict[i[1]] = con_df.groupby(by=vx.BinnerTime(con_df['Sale_Date'], resolution=i[0]), agg={'Quantity_In_Units':'sum'})


**PREPROCESSING AND VISUALIZATION**

In [15]:
#Develop in house function for rolling means/std since vaex has no inbuilt function for this

def rolling_window(a, window, find_mean, find_std):
    old_shape = a.shape[0]
    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
    strides = a.strides + (a.strides[-1],)
    view = np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
    
    if find_mean:
        final = np.array([view[i,:].mean() for i in range(view.shape[0])])
        
    elif find_std:
        final = np.array([view[i,:].std() for i in range(view.shape[0])])
        
    new_shape = view.shape[0]
    zeros = np.array([0 for x in range(old_shape - new_shape)])
    result = np.concatenate((zeros,final))
    return result

In [43]:
#Build function for plots to showcase the rolling window and SavGol

def plots(x, y, title, window=3, ylabel='Quantity Sold in Units', mean=False, std=False, sav=False):
    date="%b, %d %Y"
    xlabel='Date'
    
    fig = plt.figure()
    plt.plot_date(x, y, linestyle='solid', marker=None, fmt='c', label='Main Time Series Data')
    if sav:
        plt.plot_date(x, savgol_filter(y,window,3),
                      linestyle='solid', marker=None, fmt='y', label='Saviztky Golay Algo with Window Size of %s'%window)
    if mean:
        plt.plot_date(x, rolling_window(y,window=window,find_mean=True,find_std=False),
                      linestyle='solid', marker=None, fmt='r', label='Moving Average with Window Size of %s'%window)
    if std:
        plt.plot_date(x, rolling_window(y,window=window,find_std=True, find_mean=False),
                      linestyle='solid', marker=None, fmt='b', label='Moving Std with Window Size of %s'%window)
    plt.gcf().autofmt_xdate()
    date_format = mpl_dates.DateFormatter(date)
    plt.gca().xaxis.set_major_formatter(date_format)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.legend(loc='best')
    plt.title(title)
    plt.tight_layout()
    plt.show()

In [44]:
windows = [16, 12]

for freq in freq_dict.keys():
    plots(freq_dict[freq]['Sale_Date'].values, freq_dict[freq]['Quantity_In_Units'].values,
          f'{freq} Trend'.upper(),5, mean=True, sav=True)

### **Stationarity Check With ADF Test**

In [18]:
# Adfuller Test to Check for Stationary Datasets
def stat_check(data, ts):
    test = adfuller(data, autolag='AIC')
    results = pd.Series(test[0:4], index=['Test Statistic', 'P-value', '#lags used', 'Number of Observations Used'])
    for key,value in test[4].items():
        results['Critical Value (%s)'%key] = value
    
    print(f'Results of Dickey Fuller Test for {ts} Data \n{results}')


for freq in freq_dict.keys():
    stat_check(freq_dict[freq]['Quantity_In_Units'].values, freq.upper())



Results of Dickey Fuller Test for WEEKLY Data 
Test Statistic                  -3.349615
P-value                          0.012801
#lags used                       2.000000
Number of Observations Used    150.000000
Critical Value (1%)             -3.474715
Critical Value (5%)             -2.881009
Critical Value (10%)            -2.577151
dtype: float64
Results of Dickey Fuller Test for MONTHLY Data 
Test Statistic                 -1.791154
P-value                         0.384862
#lags used                      7.000000
Number of Observations Used    29.000000
Critical Value (1%)            -3.679060
Critical Value (5%)            -2.967882
Critical Value (10%)           -2.623158
dtype: float64


### **1. Seasonal Decomposition to Ensure Stationarity**

In [19]:
# One means of ensuring stationarity is decomposing and taking the residuals
def decompose(data, t_series, period, show_plot=True):
    result = seasonal_decompose(data, period=12)
    if show_plot:
        plots(t_series, result.trend, 'Trend')
        plots(t_series, result.seasonal, 'Seasonality')
        plots(t_series, result.resid, 'Residuals')
    return np.nan_to_num(result.resid, 0)

In [20]:
for freq in freq_dict.keys():
    residuals = decompose(freq_dict[freq]['Quantity_In_Units'].values,
                              freq_dict[freq]['Sale_Date'].values, period=12)
    freq_dict[freq]['residuals'] = residuals


### **2. Log of the Values**

In [21]:
for freq in freq_dict.keys():
    freq_dict[freq]['log'] = freq_dict[freq]['Quantity_In_Units'].apply(lambda x: np.log(x))

for freq in zip(freq_dict.keys(), [24, 7, 4, 12]):
    plots(freq_dict[freq[0]]['Sale_Date'].values, freq_dict[freq[0]]['log'].values,
          f'{freq[0]} Trend'.upper(), freq[1], mean=True, sav=False)

In [22]:
for freq in freq_dict.keys():
    stat_check(freq_dict[freq]['log'].values, freq.upper())

Results of Dickey Fuller Test for WEEKLY Data 
Test Statistic                  -3.938661
P-value                          0.001768
#lags used                       8.000000
Number of Observations Used    144.000000
Critical Value (1%)             -3.476598
Critical Value (5%)             -2.881829
Critical Value (10%)            -2.577589
dtype: float64
Results of Dickey Fuller Test for MONTHLY Data 
Test Statistic                 -2.468085
P-value                         0.123434
#lags used                      0.000000
Number of Observations Used    36.000000
Critical Value (1%)            -3.626652
Critical Value (5%)            -2.945951
Critical Value (10%)           -2.611671
dtype: float64


### 3. **Subtracting Rolling Mean**

In [23]:
for freq in zip(freq_dict.keys(), windows):
    freq_dict[freq[0]]['roll_mean'] = rolling_window(freq_dict[freq[0]]['log'].values, freq[1], find_mean=True, find_std=False)
    freq_dict[freq[0]]['diff_rolling'] = freq_dict[freq[0]]['log'] - freq_dict[freq[0]]['roll_mean']

for col in ['roll_mean', 'diff_rolling']:
    for freq in zip(freq_dict.keys(), windows):
        plots(freq_dict[freq[0]]['Sale_Date'].values, freq_dict[freq[0]][col].values,
              f'{freq[0]} Trend'.upper(), freq[1], mean=True, sav=False)

In [24]:
for freq in freq_dict.keys():
    stat_check(freq_dict[freq]['diff_rolling'].values, freq.upper())

Results of Dickey Fuller Test for WEEKLY Data 
Test Statistic                  -2.495043
P-value                          0.116685
#lags used                       8.000000
Number of Observations Used    144.000000
Critical Value (1%)             -3.476598
Critical Value (5%)             -2.881829
Critical Value (10%)            -2.577589
dtype: float64
Results of Dickey Fuller Test for MONTHLY Data 
Test Statistic                 -1.032874
P-value                         0.741086
#lags used                      1.000000
Number of Observations Used    35.000000
Critical Value (1%)            -3.632743
Critical Value (5%)            -2.948510
Critical Value (10%)           -2.613017
dtype: float64


### **4. Subtracting Exponential Rolling Means**

In [25]:
for freq in zip(freq_dict.keys(), windows):
    temp_df = pd.Series(freq_dict[freq[0]]['log'].values)
    freq_dict[freq[0]]['diff_ewm'] = temp_df.ewm(halflife = freq[1], min_periods=0, adjust=True).mean().values


for freq in zip(freq_dict.keys(), windows):
    plots(freq_dict[freq[0]]['Sale_Date'].values, freq_dict[freq[0]]['diff_ewm'].values,
          f'{freq[0]} Trend'.upper(), freq[1], mean=True, sav=False)

In [26]:
for freq in freq_dict.keys():
    stat_check(freq_dict[freq]['diff_ewm'].values, freq.upper())

Results of Dickey Fuller Test for WEEKLY Data 
Test Statistic                  -3.776439
P-value                          0.003158
#lags used                       9.000000
Number of Observations Used    143.000000
Critical Value (1%)             -3.476927
Critical Value (5%)             -2.881973
Critical Value (10%)            -2.577665
dtype: float64
Results of Dickey Fuller Test for MONTHLY Data 
Test Statistic                 -2.670861
P-value                         0.079218
#lags used                      0.000000
Number of Observations Used    36.000000
Critical Value (1%)            -3.626652
Critical Value (5%)            -2.945951
Critical Value (10%)           -2.611671
dtype: float64


### **5. Differencing of the Dataset**

In [27]:
for freq in freq_dict.keys():
    temp_df = pd.Series(freq_dict[freq]['log'].values)
    freq_dict[freq]['diff_shift'] = np.nan_to_num(np.array(temp_df - temp_df.shift()), 0)

for freq in zip(freq_dict.keys(), windows):
    plots(freq_dict[freq[0]]['Sale_Date'].values, freq_dict[freq[0]]['diff_shift'].values,
          f'{freq[0]} Trend'.upper(), freq[1], mean=True, sav=False)

In [28]:
for freq in freq_dict.keys():
    stat_check(freq_dict[freq]['diff_ewm'].values, freq.upper())

Results of Dickey Fuller Test for WEEKLY Data 
Test Statistic                  -3.776439
P-value                          0.003158
#lags used                       9.000000
Number of Observations Used    143.000000
Critical Value (1%)             -3.476927
Critical Value (5%)             -2.881973
Critical Value (10%)            -2.577665
dtype: float64
Results of Dickey Fuller Test for MONTHLY Data 
Test Statistic                 -2.670861
P-value                         0.079218
#lags used                      0.000000
Number of Observations Used    36.000000
Critical Value (1%)            -3.626652
Critical Value (5%)            -2.945951
Critical Value (10%)           -2.611671
dtype: float64


### **MODELLING WITH ARIMA AND SARIMAX**

In [29]:
# # Differencing
# def diff(dataset, interval, col):
#     result = []
#     for i in range(interval, len(dataset)):
#         offset = i - interval
#         value = dataset[col].values[i] - dataset[col].values[offset]
#         result.append(value)
#     return (result)

# %lprun -f diff diff(dataset=anti_d, interval=1, col="log_data")

In [30]:
# You can determine the p and q lags using the plots however, i choose to use this automated approach and iterate
# through the possible combinations of lags i can get
bic_order = {}
for freq in freq_dict.keys():
    bic = arma_order_select_ic(freq_dict[freq].diff_shift.values)
    bic_order[freq] = bic['bic_min_order']

print(bic_order)

{'weekly': (0, 1), 'monthly': (0, 0)}


In [31]:
# You can plot pacf and the acf curves as well
def plot_acf_pacf(df, ts):
  """
  Plot auto-correlation function (ACF) and partial auto-correlation (PACF) plots
  """
  f, (ax1, ax2) = plt.subplots(1,2, figsize = (10, 5)) 

  #Plot ACF: 

  ax1.plot(lag_acf)
  ax1.axhline(y=0,linestyle='--',color='gray')
  ax1.axhline(y=-1.96/np.sqrt(len(df)),linestyle='--',color='gray')
  ax1.axhline(y=1.96/np.sqrt(len(df)),linestyle='--',color='gray')
  ax1.set_title('Autocorrelation Function for %s' %(ts))

  #Plot PACF:
  ax2.plot(lag_pacf)
  ax2.axhline(y=0,linestyle='--',color='gray')
  ax2.axhline(y=-1.96/np.sqrt(len(df)),linestyle='--',color='gray')
  ax2.axhline(y=1.96/np.sqrt(len(df)),linestyle='--',color='gray')
  ax2.set_title('Partial Autocorrelation Function for %s' %(ts))
  
  plt.tight_layout()
  plt.show()  
  return

In [32]:
# function to run our model
def run_arima_model(df, dates, p, d, q, freq):
    """
    Run ARIMA model
    """
    #assign the date to the index of the dataset
    df = pd.Series(df, index=dates)
    
    # fit ARIMA model on time series
    model = ARIMA(df, order=(p, d, q))  
    results_ = model.fit()  

    # get lengths correct to calculate RSS
    len_results = len(results_.fittedvalues)
    ts_modified = df[-len_results:]

    # calculate root mean square error (RMSE) and residual sum of squares (RSS)
    rss = sum((results_.fittedvalues - ts_modified)**2)
    rmse = np.sqrt(rss / len(df))

    # plot fit
    fig = plt.figure()
    plt.plot(dates, df)
    plt.plot(dates, results_.fittedvalues, color = 'red')
    plt.title('ARIMA model (%i, %i, %i) for RSS: %.4f, RMSE: %.4f %s' %(p, d, q, rss, rmse, freq))

    plt.show()  
    return results_ , rmse

In [33]:
arima_dict = {}
model_list = []

for freq in freq_dict.keys():
    for p in [0,1,2]:
        for d in [0,1]:
            for q in [0,1,2]:
                model_AR = run_arima_model(df=freq_dict[freq].diff_shift.values, dates = freq_dict[freq].Sale_Date.values,
                                           p=p, d=d, q=q, freq=freq)
                if model_list == []:
                    model_list.append(model_AR)
                else:
                    if model_list[0][1] > model_AR[1]:
                        model_list[0] = model_AR
    arima_dict[freq] = model_list[0]
    model_list = []
            

In [34]:
# function to run our model
def run_sarimax_model(df, dates, p, d, q, s, freq):
    """
    Run SARIMAX model
    """
    #assign the date to the index of the dataset
    df = pd.Series(df, index=dates)
    
    # fit SARIMAX model on time series
    model = SARIMAX(df, order=(p, d, q), seasonal_order = (p,d,q,s))  
    results_ = model.fit()  

    # get lengths correct to calculate RSS
    len_results = len(results_.fittedvalues)
    ts_modified = df[-len_results:]

    # calculate root mean square error (RMSE) and residual sum of squares (RSS)
    rss = sum((results_.fittedvalues - ts_modified)**2)
    rmse = np.sqrt(rss / len(df))

    # plot fit
    fig = plt.figure()
    plt.plot(dates, df)
    plt.plot(dates, results_.fittedvalues, color = 'red')
    plt.title('SARIMAX model (%i, %i, %i) for RSS: %.4f, RMSE: %.4f %s' %(p, d, q, rss, rmse, freq))

    plt.show()  
    return results_ , rmse

In [35]:
sarimax_dict = {}
model_list = []

for freq in zip(freq_dict.keys(), windows):
    for p in [0,1,2]:
        for d in [0,1]:
            for q in [0,1,2]:
                model_sa = run_sarimax_model(df=freq_dict[freq[0]].diff_shift.values,
                                             dates = freq_dict[freq[0]].Sale_Date.values, p=p, d=d, q=q,
                                             s=freq[1], freq=freq[0])
                if model_list == []:
                    model_list.append(model_sa)
                else:
                    if model_list[0][1] > model_sa[1]:
                        model_list[0] = model_sa
    sarimax_dict[freq[0]] = model_list[0]
    model_list = []
            

## **Prediction**

In [36]:
df = freq_dict['monthly'].to_pandas_df()
df.set_index('Sale_Date', inplace=True)
model = sarimax_dict['monthly'][0]
pred = pd.Series(model.fittedvalues, copy=True)

In [37]:
def transform(predictions, log):
    pred_cumsum = predictions.cumsum()
    pred_log = log
    pred_log = pred_log.add(pred_cumsum, fill_value=0)
    pred = np.exp(pred_log)
    return pred

In [38]:
from pandas.tseries.offsets import DateOffset
future_dates = [df.index[-1]+ DateOffset(months=x)for x in range(0,24)]
future_datest_df = pd.DataFrame(index=future_dates[1:],columns=df.columns)
future_df=pd.concat([df,future_datest_df])


In [39]:
future_df['forecast'] = transform(model.predict(start = 25, end = 60, dynamic= True), future_df.log)


In [40]:
fut = arima_dict['monthly'][0].predict(start=, end='2023-01')

SyntaxError: invalid syntax (Temp/ipykernel_9580/3756865438.py, line 1)

In [None]:
fut[:len(freq_dict['monthly']['roll_mean'].values)] += freq_dict['monthly']['roll_mean'].values

In [None]:
fut['2021-11':] = fut[:'2021-11'].mean() + fut['2021-11':]

In [None]:
pred = np.exp(fut)

In [None]:
date = pd.date_range(start='2018-10', end='2022-11', freq='M')

In [None]:
def pred_plots(pred_x, pred_y, x, y, pred_label):
    fig = plt.figure()
    plt.plot(x, y, color='r', label=pred_label)
    plt.plot(x, y, color='g', label='Main Time Series')
    plt.legend(loc='best')
    plt.show()

In [None]:
pred_plots(freq_dict['monthly']['Sale_Date'].values, freq_dict['monthly']['arima'].values,
           freq_dict['monthly']['Sale_Date'].values, freq_dict['monthly']['Quantity_In_Units'].values, "Arima_Predictions")

### **LSTM**

In [50]:
# Create two dataframes for  weekly and monthly analysis only to prevent use of the dictionary
weekly = freq_dict['weekly'][['Sale_Date', 'Quantity_In_Units']]
monthly = freq_dict['monthly'][['Sale_Date', 'Quantity_In_Units']]

In [56]:
# Preparing with Monthly Data Only
# univariate data preparation
# split a univariate sequence into samples
def split(arr, n_steps):
	X, y = list(), list()
	for i in range(len(arr)):
		# find the end of this pattern
		end_ix = i + n_steps
		# check if we are beyond the sequence
		if end_ix > len(arr)-1:
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = arr[i:end_ix], arr[end_ix]
		X.append(seq_x)
		y.append(seq_y)
	return np.array(X), np.array(y)

# define input sequence
X, y = split(monthly['Quantity_In_Units'].values, 3)
# summarize the data
for i in range(len(X)):
	print(X[i], y[i])

[65. 50. 22.] 180.0
[ 50.  22. 180.] 325.0
[ 22. 180. 325.] 142.0
[180. 325. 142.] 988.0
[325. 142. 988.] 2680.0
[ 142.  988. 2680.] 1407.0
[ 988. 2680. 1407.] 1765.0
[2680. 1407. 1765.] 2018.0
[1407. 1765. 2018.] 3663.0
[1765. 2018. 3663.] 2608.0
[2018. 3663. 2608.] 2459.0
[3663. 2608. 2459.] 1836.0
[2608. 2459. 1836.] 2110.0
[2459. 1836. 2110.] 1418.0
[1836. 2110. 1418.] 1070.0
[2110. 1418. 1070.] 999.0
[1418. 1070.  999.] 1034.0
[1070.  999. 1034.] 1579.0
[ 999. 1034. 1579.] 1137.0
[1034. 1579. 1137.] 1678.0
[1579. 1137. 1678.] 1380.0
[1137. 1678. 1380.] 1614.0
[1678. 1380. 1614.] 1615.0
[1380. 1614. 1615.] 1148.0
[1614. 1615. 1148.] 1668.0
[1615. 1148. 1668.] 1062.0
[1148. 1668. 1062.] 1429.0
[1668. 1062. 1429.] 809.0
[1062. 1429.  809.] 1408.0
[1429.  809. 1408.] 1566.0
[ 809. 1408. 1566.] 1828.0
[1408. 1566. 1828.] 1889.0
[1566. 1828. 1889.] 1429.0
[1828. 1889. 1429.] 216.0


In [57]:
# reshape from [samples, timesteps] into [samples, timesteps, features]
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))

#### One Layer LSTM

In [60]:
# define model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
# fit model
model.fit(X, y, epochs=200, verbose=2)


NameError: name 'Sequential' is not defined

In [None]:
# demonstrate prediction
x_input = array([70, 80, 90])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(yhat)