In [None]:
import pandas as pd
import numpy as np
from pandas.tseries.offsets import DateOffset
import random
import time

In [None]:
def make_adjastent_transitions(daily_summary:pd.Series, adjastend_days: int):
    transitions = pd.concat([train_split.shift(),train_split], axis = 1).dropna()
    transitions.columns = ['in','out']
    transitions['dayofyear'] = transitions.index.dayofyear
    transitions['left'] = transitions.index + DateOffset(days=-adjastend_days)
    transitions['right'] = transitions.index + DateOffset(days=adjastend_days)
    return transitions


def make_covar_tuples(adjastent_transitions:pd.DataFrame):
    covar = []
    for day, dailydata in adjastent_transitions.groupby(by='dayofyear'):
        filter_periods = dailydata[['left','right']].reset_index(drop = True)
        filter_periods = np.concatenate(
            filter_periods.apply(
                lambda x: pd.date_range(start = x[0], end = x[1], freq = 'D'), axis = 1
            ).values
        )
        covar += list(map(
            lambda x: (day, x[0],x[1]), 
            adjastent_transitions[adjastent_transitions.index.isin(filter_periods)][['in','out']].values))    
    return covar

def markov_forecast(adjastent_transitions, covar_tuples):
    weather_statement = adtr.iloc[-1].out
    forecast = []
    
    forecast_periods = pd.date_range(
        start = adjastent_transitions.index.max(), 
        periods = 367, freq = 'D'
    )[1:]
        
    for period in forecast_periods:    
        next_day_possible_conditions = list(filter(
            lambda x:x[0] == period.dayofyear and x[1] == weather_statement, 
            covar_tuples
        ))
        
        day_prediction = random.choice(next_day_possible_conditions)
        weather_statement = day_prediction[2]
        forecast.append({'period':period, 'Short Summary':day_prediction[1]})
    forecast = pd.DataFrame(forecast).set_index('period').squeeze()
    return forecast

In [None]:
cv_results = []
for adjastent_days in range(1, 5):
    for iteration in range(5):
        ts = time.time()
        print('adj. days variable:%i, iteration #%i' % (adjastent_days, iteration), end = '')
        adtr = make_adjastent_transitions(train_split, adjastent_days)
        covar = make_covar_tuples(adtr)
        forecast = markov_forecast(adtr, covar)

        prediction_score = pd.concat([
            validation_split.to_frame('fact'), 
            forecast.to_frame('forecast')
        ], axis = 1)\
        .apply(lambda x: 1 if x['fact'] == x['forecast'] else 0, axis = 1)

        
        cv_results.append({
            'adjastent_days': adjastent_days, 
            'iteration': iteration, 
            'score': prediction_score.sum() / prediction_score.shape[0],
            'time_eval_sec': round(time.time() - ts,1)
        })      
        print(' in %i sec.' % (round(time.time() - ts,1)))

In [None]:
pd.DataFrame(cv_results).groupby('adjastent_days').score.mean()