In [None]:
import boto3
import pandas as pd
from io import StringIO
import numpy 
import datetime
#used for using plots   
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()
#statsmodels for prediction
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_model import ARIMA, ARIMAResults
from pmdarima import auto_arima
from statsmodels.tsa.seasonal import seasonal_decompose
import sys


def get_data_from_s3(bucket_name, object_name):
    client=boto3.client('s3',aws_access_key_id='', aws_secret_access_key='')

    csv_obj=client.get_object(Bucket=bucket_name, Key=object_name)
    body=csv_obj['Body']
    csv_string=body.read().decode('utf-8')

    data = pd.read_csv(StringIO(csv_string),sep=';')

    return data

def prepare_daily_data():
    pogoda_dane=get_data_from_s3('bucket_name','filename.csv')
    #Change data type to datetime
    pogoda_dane['czas'] = pd.to_datetime(pogoda_dane['czas'])
    pogoda_dane.set_index('czas', inplace=True)
    #set nominal frequency to hourly
    pogoda_dane=pogoda_dane.asfreq('H')
    #resample each column to daily data
    pogoda_dane['temp_zewn']=pogoda_dane['temp_zewn'].resample(rule='D').mean()
    pogoda_dane['wilgotnosc_wzgl']=pogoda_dane['wilgotnosc_wzgl'].resample(rule='D').mean()
    pogoda_dane['predkosc_wiatru']=pogoda_dane['predkosc_wiatru'].resample(rule='D').mean()
    pogoda_dane['naslonecznienie']=pogoda_dane['naslonecznienie'].resample(rule='D').sum()
    pogoda_dane['zachmurzenie']=pogoda_dane['zachmurzenie'].resample(rule='D').mean()
    pogoda_dane['kierunek_wiatru']=pogoda_dane['kierunek_wiatru'].resample(rule='D').mean()
    pogoda_dane['dzien_tygodnia']=pogoda_dane['dzien_tygodnia'].resample(rule='D').mean()
    pogoda_dane=pogoda_dane.asfreq('D')

    return pogoda_dane

def prepare_hourly_data():
    pogoda_dane=get_data_from_s3('bucket_name','filename.csv')
    #Change data type to datetime
    pogoda_dane['czas'] = pd.to_datetime(pogoda_dane['czas'])
    pogoda_dane.set_index('czas', inplace=True)
    #set nominal frequency to hourly
    pogoda_dane=pogoda_dane.asfreq('H')

    return pogoda_dane

def get_seasonal_decompose(prepared_data):
    res = seasonal_decompose(prepared_data, model="ref")
    res.plot()

def show_prediction(df1, df2, x1, x2, x3):
    model=ARIMA(df1,order=(x1,x2,x3))
    #Results
    res=model.fit()

    start=len(df1)
    end=len(df1)+len(df2)-1
    prediction=res.predict(start, end).rename('ARIMA Prediction')
    df2.plot(figsize=(18,8), legend=True)
    prediction.plot(legend=True)

    return prediction

#Mean Absolute error
def mae(y1, y2, axis=0):
    y1_np=y1.to_numpy()
    y2_np=y2.to_numpy()
    return numpy.mean(numpy.abs(y1_np-y2_np),axis=axis)

#Mean Squared Error
def mse(y1, y2, axis=0):
    y1_np=y1.to_numpy()
    y2_np=y2.to_numpy()
    return ((y1_np-y2_np)**2).mean(axis=axis)

def print_errors(df1,df2):
    print('Mean absolute error: ', mae(df1,df2, None))
    print('Mean squared error: ', mse(df1,df2, None))
    print('Squared root of mean squared absolute error: ', numpy.sqrt(mse(df1,df2, None)))

#Use daily data
weather_df=prepare_daily_data()
#Use hourly data
#weather_df=prepare_hourly_data()

length=len(weather_df)
print('Length of base: ', length)

col_to_predict='temp_zewn'
col_to_decompose='naslonecznienie'
#form of weather_df[['col_exegenous']] , to not use exegenous data, set ''
col_exegenous=weather_df[['naslonecznienie']]
#Period for seasonality
#For daily 1 = 1 day
#For hourly 1 = 1 hour 
period=30

#get informations about analyzed column
#get_seasonal_decompose(weather_df[col_to_decompose])

#use autoarima to get best model
#auto_arima(weather_df[col_to_predict], m=period, exegenous=weather_df[['naslonecznienie']],trace=True, seasonal=True).summary()
#sys.stdout = open("test.txt", "w")
#auto_arima(weather_df[col_to_predict], m=period, trace=True).summary()
#sys.stdout.close()
#These variables you get from auto_arima
x1=4
x2=0
x3=1

#Analized database have 2 posible length
if length==1890:
    #set df for prediction
    train_df=weather_df[col_to_predict].iloc[:1512]
    test_df=weather_df[col_to_predict].iloc[1512:]
else:
    #set df for prediction
    train_df=weather_df[col_to_predict].iloc[:36288]
    test_df=weather_df[col_to_predict].iloc[36288:]

print(train_df)
#prediction=show_prediction(train_df,test_df, x1, x2, x3)
model=ARIMA(train_df,order=(x1,x2,x3))
#Results
res=model.fit()

start=len(train_df)
end=len(train_df)+len(test_df)-1
prediction=res.predict(start, end).rename('ARIMA Prediction')
test_df.plot(figsize=(18,8), legend=True)
prediction.plot(legend=True)

print_errors(test_df,prediction)
