In [1]:
def get_scoring_measures(x, h, i, comparing_freqs=False):
    '''
    Function to estimate RMSE and R2_adj from a dataframe of 
    results.
    
    Parameters
    ----------
    
    x : pandas.DataFrame
        Data frame containing the observations and 
        predictions of a time series using any given method.
        Note that the first column must contain the 
        observations, and only the columns of predictions
        extracted from lags exclusively have to contain 'lag'
        in their column name. All columns have to be of the
        same lenght and NaN values are invalid.
    
    h : int
        Number of lags used for the predictions.
        
    i : int    
        Total number of features used to get predictions
        from models that used both lags and external data.
        
    comparing_freqs : bool
        Feature to instruct the function to change 
        calculation: if True, daily observations vs grouped
        48 obs./day will be compared. In that case just the
        Root Mean Squared Error will be computed.
    
    Returns
    -------
    
    pandas.DataFrame containing the RMSE and R2_adjusted 
    scoring measure of the results of each scenario vs. the
    observations (or just the RMSE in case of comparing 
    different frequencies). The row index will be the name 
    of the columns on the input data frame.
    
    '''
    
    x.sort_index(inplace=True)
    
    # Estimate RMSE
    temp_df = pd.DataFrame
    
    for e in list(x.columns)[1:]:
        if temp_df.empty:
            temp_df = pd.DataFrame(pd.Series(np.sqrt(mean_squared_error(x[list(x.columns)[0]], x[e])), name = str(e)))
    
        else:
            temp_df = pd.concat([temp_df, pd.DataFrame(pd.Series(np.sqrt(mean_squared_error(x[list(x.columns)[0]], x[e])), name = str(e)))], axis=1)
            
    temp_df = pd.DataFrame(temp_df.values.reshape(-1, 1), index=list(x.columns)[1:], columns=['RMSE'])
    

    
        
    if comparing_freqs == False:
    
    # Estimate R2_adjusted
        temp_df_2 = pd.DataFrame
    
        for c in list(x.columns)[1:]:
            if temp_df_2.empty:
                temp_df_2 = pd.DataFrame(pd.Series(r2_score(x[list(x.columns)[0]], x[c]), name=str(c)))

            else:
                temp_df_2 = pd.concat([temp_df_2, pd.DataFrame(pd.Series(r2_score(x[list(x.columns)[0]], x[c]), name=str(c)))], axis=1)

    
        for n in list(temp_df_2.columns):
            if 'lag' in str(n):
                temp_df_2[n] = 1 - (1 - temp_df_2[n]) * ((x.shape[0] - 1) / (x.shape[0] - h - 1))
            else:
                temp_df_2[n] = 1 - (1 - temp_df_2[n]) * ((x.shape[0] - 1) / (x.shape[0] - i - 1))
    
        temp_df_2 = pd.DataFrame(temp_df_2.values.reshape(-1, 1), index=list(x.columns)[1:], columns=['R2_adjusted'])
        temp_df = pd.concat([temp_df, temp_df_2], axis = 1)
             
    
    
    return temp_df