In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import date
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller
from datetime import timedelta
from time import time
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm
import statsmodels.api as sm
from prophet import Prophet
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_percentage_error
from sklearn.model_selection import ParameterGrid

In [4]:
csv_file = r'C:\Users\bella\Downloads\Y2Q4 JBG050 Data Challenge 2\DataChallange2\total_barnet.csv'
dataset = pd.read_csv(csv_file)
dataset = dataset.drop(['last_outcome_category', 'location'], axis=1)
mask = dataset['ward_name']!=dataset['ward_name']
dataset = dataset[~mask]

In [5]:
dataset.head()
# print(len(dataset))

Unnamed: 0,crime_id,month,longitude,latitude,lsoa_code,lsoa_name,ward_code,ward_name
0,5feceb66ffc86f38d952786c6d696c79c2dbc239dd4e91...,2010-12,-0.201877,51.655538,E01000248,Barnet 001A,E05013644,High Barnet
1,6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d...,2010-12,-0.207853,51.654317,E01000248,Barnet 001A,E05013644,High Barnet
2,d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f...,2010-12,-0.20251,51.656348,E01000248,Barnet 001A,E05013644,High Barnet
3,4e07408562bedb8b60ce05c1decfe3ad16b72230967de0...,2010-12,-0.206779,51.654768,E01000248,Barnet 001A,E05013644,High Barnet
4,4b227777d4dd1fc61c6f884f48641d02b4d121d3fd328c...,2010-12,-0.209537,51.655223,E01000249,Barnet 001B,E05013644,High Barnet


In [6]:
# sarima function

def prepare_timeseries(timeseries, train_months):
    
    timeseries = pd.Series(timeseries.values,index=[pd.Timestamp(x, freq='M') for x in timeseries.index])
    
    return timeseries

def training(timeseries, train_months):

    train_data = timeseries[:-train_months]
    test_data = timeseries.tail(train_months)
    
    return train_data, test_data

def corona_troubles(timeseries):
    remove_start = date(2019, 11, 1)
    remove_end = date(2020, 7, 1)
    temp_df = pd.DataFrame(timeseries).reset_index()
    
    
    for x in timeseries[remove_start:remove_end].index:
        temp = temp_df[temp_df['index'].dt.month == x.month]
        temp = temp[temp['index'].dt.year != x.year]
        timeseries[x] = int(temp[0].mean())
        
    return timeseries

def create_model(train_data, my_order, my_seasonal_order):
    model = SARIMAX(train_data, order=my_order, seasonal_order=my_seasonal_order)
    model_fit = model.fit()
    return model_fit

def pred_res(model_fit, test_data):
    
    predictions = model_fit.forecast(len(test_data))
    predictions = pd.Series(predictions, index=test_data.index)
    residuals = test_data - predictions
    
    return predictions, residuals

def statistics(residuals, test_data):
    MAPE = round(np.mean(abs(residuals/test_data)),4)
    RMSE = np.sqrt(np.mean(residuals**2))
    return MAPE, RMSE
    
def rolling_pred_res(train_data, test_data, timeseries, my_order, my_seasonal_order):
    rolling_predictions = test_data.copy()
    for train_end in test_data.index:
        train_data = timeseries[:train_end-timedelta(days=1)]
        model = SARIMAX(train_data, order=my_order, seasonal_order=my_seasonal_order)
        model_fit = model.fit()

        pred = model_fit.forecast()
        rolling_predictions[train_end] = pred

    rolling_residuals = test_data - rolling_predictions
    
    return rolling_predictions, rolling_residuals, model_fit
        
        
def rolling_statistics(rolling_residuals, test_data):
    RMAPE = round(np.mean(abs(rolling_residuals/test_data)),4)
    RRMSE = np.sqrt(np.mean(rolling_residuals**2))
    
    return RMAPE, RRMSE

def sarima_model(ward_name, train_months, my_order, my_seasonal_order):
    dataframe_ward = dataset[dataset['ward_name']==ward_name]
    temp_timeseries = pd.DataFrame()
    temp_timeseries = dataframe_ward.groupby("month").count()['latitude']
    temp_timeseries = prepare_timeseries(temp_timeseries, train_months)
    temp_timeseries = corona_troubles(temp_timeseries)
    temp_train, temp_test = training(temp_timeseries, train_months)

    temp_rolling_predictions, temp_rolling_residuals, model_fit = rolling_pred_res(temp_train, temp_test, temp_timeseries, my_order = my_order, my_seasonal_order = my_seasonal_order)
    temp_rmape, temp_rrmse = rolling_statistics(temp_rolling_residuals, temp_test)
    
    return temp_rmape, temp_rrmse, model_fit

# example usage
ward = 'Burnt Oak'
train_months = 6
my_order = (0,1,0)
my_seasonal_order = (2, 0, 1, 6)
# opt_my_order, opt_my_seasonal_order = optimize_sarima(ward, train_months)
sarima_mape, sarima_rmse, model_fit = sarima_model(ward, train_months, my_order, my_seasonal_order)
sarima_mape, sarima_rmse, model_fit.forecast()

(0.4793,
 5.1894877455734045,
 2023-03-01    5.07609
 Freq: MS, dtype: float64)

In [37]:
# prophet model 

def optimize_prophet(dataframe, train_months):
    # Define the hyperparameter grid
    param_grid = {
        'changepoint_prior_scale': [0.01, 0.1, 0.5, 1.0],
        'seasonality_prior_scale': [0.01, 0.1, 0.5, 1.0],
        'seasonality_mode': ['additive', 'multiplicative'],
        'daily_seasonality': [False, True]
        }
    best_params = {}
    best_rmse = float('inf')

    # Perform grid search
    for params in ParameterGrid(param_grid):
        model = Prophet(**params)
        model.fit(dataframe)

        forecast = model.predict(dataframe[-train_months:])
        rmse = mean_squared_error(dataframe[-train_months:]['y'].values, forecast[-train_months:]['yhat'].values, squared=False)
        
        if rmse < best_rmse:
            best_rmse = rmse
            best_params = params
    changepoint_prior_scale = best_params['changepoint_prior_scale']
    daily_seasonality = best_params['daily_seasonality']
    seasonality_mode = best_params['seasonality_mode']
    seasonality_prior_scale = best_params['seasonality_prior_scale']
    # print(changepoint_prior_scale, daily_seasonality, seasonality_mode, seasonality_prior_scale)
    return changepoint_prior_scale, daily_seasonality, seasonality_mode, seasonality_prior_scale

def prophet_model(ward_name, train_months):
    dataframe_ward = dataset[dataset['ward_name']==ward_name]
    dataframe = pd.DataFrame()
    dataframe['crimes'] = dataframe_ward.groupby('month').count()['latitude']
    dataframe['month'] = dataframe_ward.groupby('month')['month'].unique().index
    covid_years = ['2019-11', '2019-12', '2020-01', '2020-02', '2020-03', '2020-04', '2020-05', '2020-06', '2020-07']
    for x in covid_years:
        dataframe = dataframe[dataframe['month'] != x]    
    dataframe['month'] = pd.to_datetime(dataframe['month'], format='%Y-%m-%d')
    dataframe = dataframe.rename(columns={'month': 'ds', 'crimes': 'y'})
    
    changepoint_prior_scale, daily_seasonality, seasonality_mode, seasonality_prior_scale = optimize_prophet(dataframe, train_months)
    
    model = Prophet(interval_width=0.95, changepoint_prior_scale = changepoint_prior_scale, daily_seasonality = daily_seasonality, seasonality_mode = seasonality_mode, seasonality_prior_scale = seasonality_prior_scale)
    model.fit(dataframe)

    train = dataframe.drop(dataframe.index[:-train_months])
    # print(train)

    forecast = model.predict(train)
    # print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']])

    y_true = dataframe['y'][-train_months:].values
    y_pred = forecast['yhat'].values
    # print(y_true, y_pred)

    prophet_r2 = r2_score(y_true, y_pred)
    # print("r^2 Score:", r2)

    prophet_rmse = mean_squared_error(y_true, y_pred, squared=False)
    # print("RMSE:", rmse)

    prophet_mape = mean_absolute_percentage_error(y_true, y_pred)
    # print("mape:", mape)

    return prophet_mape, prophet_rmse, y_pred

# Usage example
ward_name = 'Burnt Oak'
prophet_mape, prophet_rmse, y_pred = prophet_model(ward_name, 6)
prophet_mape, prophet_rmse, y_pred[-1]

16:33:59 - cmdstanpy - INFO - Chain [1] start processing
16:33:59 - cmdstanpy - INFO - Chain [1] done processing
16:33:59 - cmdstanpy - INFO - Chain [1] start processing
16:33:59 - cmdstanpy - INFO - Chain [1] done processing
16:33:59 - cmdstanpy - INFO - Chain [1] start processing
16:33:59 - cmdstanpy - INFO - Chain [1] done processing
16:33:59 - cmdstanpy - INFO - Chain [1] start processing
16:33:59 - cmdstanpy - INFO - Chain [1] done processing
16:33:59 - cmdstanpy - INFO - Chain [1] start processing
16:33:59 - cmdstanpy - INFO - Chain [1] done processing
16:34:00 - cmdstanpy - INFO - Chain [1] start processing
16:34:00 - cmdstanpy - INFO - Chain [1] done processing
16:34:00 - cmdstanpy - INFO - Chain [1] start processing
16:34:00 - cmdstanpy - INFO - Chain [1] done processing
16:34:00 - cmdstanpy - INFO - Chain [1] start processing
16:34:00 - cmdstanpy - INFO - Chain [1] done processing
16:34:00 - cmdstanpy - INFO - Chain [1] start processing
16:34:00 - cmdstanpy - INFO - Chain [1]

(0.2685131018830973, 2.7296960805032584, 13.002970680551433)

In [38]:
import pandas as pd
csv_file = r"C:\Users\bella\Downloads\Y2Q4 JBG050 Data Challenge 2\DataChallange2\population density.csv"
df = pd.read_csv(csv_file)
list = df['name']
list[19] = 'Totteridge & Woodside'
list

0                 Burnt Oak
1               Barnet Vale
2            Brunswick Park
3               Childs Hill
4           Colindale North
5           Colindale South
6               Cricklewood
7               East Barnet
8             East Finchley
9                   Edgware
10              Edgwarebury
11      Finchley Church End
12            Friern Barnet
13            Garden Suburb
14            Golders Green
15                   Hendon
16              West Hendon
17              High Barnet
18                Mill Hill
19    Totteridge & Woodside
20                Underhill
21            West Finchley
22                Whetstone
23                Woodhouse
Name: name, dtype: object

In [39]:
lst = []

for ward in list:
    ward_name = ward
    result = prophet_model(ward_name, 6)
    lst.append((ward_name, result))

16:34:12 - cmdstanpy - INFO - Chain [1] start processing
16:34:12 - cmdstanpy - INFO - Chain [1] done processing
16:34:12 - cmdstanpy - INFO - Chain [1] start processing
16:34:12 - cmdstanpy - INFO - Chain [1] done processing
16:34:12 - cmdstanpy - INFO - Chain [1] start processing
16:34:12 - cmdstanpy - INFO - Chain [1] done processing
16:34:12 - cmdstanpy - INFO - Chain [1] start processing
16:34:12 - cmdstanpy - INFO - Chain [1] done processing
16:34:13 - cmdstanpy - INFO - Chain [1] start processing
16:34:13 - cmdstanpy - INFO - Chain [1] done processing
16:34:13 - cmdstanpy - INFO - Chain [1] start processing
16:34:13 - cmdstanpy - INFO - Chain [1] done processing
16:34:13 - cmdstanpy - INFO - Chain [1] start processing
16:34:13 - cmdstanpy - INFO - Chain [1] done processing
16:34:13 - cmdstanpy - INFO - Chain [1] start processing
16:34:13 - cmdstanpy - INFO - Chain [1] done processing
16:34:13 - cmdstanpy - INFO - Chain [1] start processing
16:34:13 - cmdstanpy - INFO - Chain [1]

In [10]:
lst

[('Burnt Oak',
  (0.2685131018830973,
   2.7296960805032584,
   array([10.48379561, 13.43483183, 11.78328939, 12.77668905, 10.5055808 ,
          13.00297068]))),
 ('Barnet Vale',
  (0.38585281418096457,
   1.905351352249306,
   array([5.4750673 , 5.95757804, 6.18888762, 4.93871559, 4.15171936,
          4.51939551]))),
 ('Brunswick Park',
  (0.41787977035228274,
   4.047014515123431,
   array([8.04097625, 8.90078635, 6.28131596, 7.23778584, 7.73845459,
          7.305506  ]))),
 ('Childs Hill',
  (0.11066340812328142,
   5.1876131925790565,
   array([18.18868981, 27.53116811, 20.42936664, 20.46042808, 22.01201631,
          20.59121883]))),
 ('Colindale North',
  (0.22498753912028832,
   2.1143374114944025,
   array([5.70290475, 6.51672025, 6.65970022, 7.98124456, 5.97326101,
          7.44262609]))),
 ('Colindale South',
  (0.4857541642063585,
   2.9860966471460504,
   array([ 9.73125841,  9.10737379, 10.70449618, 10.0864782 ,  8.54767594,
           9.43014939]))),
 ('Cricklewood',


In [40]:
mape_values = [entry[1][0] for entry in lst]

# Calculate the average MAPE
average_mape = sum(mape_values) / len(mape_values)

print(average_mape)

0.29683827566412707


In [36]:
rmse_values = [entry[1][1] for entry in lst]

# Calculate the average MAPE
average_rmse = sum(rmse_values) / len(rmse_values)

print(average_rmse)

2.7688428162179286


In [20]:
opt_my_order, opt_my_seasonal_order = optimize_sarima('Burnt Oak', 6)
sarima_mape, sarima_rmse, model_fit= sarima_model('Burnt Oak', 6, my_order = opt_my_order, my_seasonal_order = opt_my_seasonal_order)
sarima_mape, sarima_rmse, model_fit

(2, (1, 0, 0), (1, 0, 1, 6)) Does not work


(0.3884,
 4.011038210442629,
 <statsmodels.tsa.statespace.sarimax.SARIMAXResultsWrapper at 0x1d6dd87cbd0>)

In [26]:
lst = []

for ward in list:
    ward_name = ward
    opt_my_order, opt_my_seasonal_order = optimize_sarima(ward_name, 6)
    sarima_mape, sarima_rmse, model_fit= sarima_model(ward_name, 6, my_order = opt_my_order, my_seasonal_order = opt_my_seasonal_order)    
    lst.append((ward_name, result))

(2, (1, 0, 0), (1, 0, 1, 6)) Does not work
(2, (1, 0, 1), (2, 0, 1, 12)) Does not work
(2, (1, 0, 2), (1, 0, 1, 12)) Does not work
(2, (1, 0, 2), (1, 0, 1, 6)) Does not work
(2, (1, 0, 0), (2, 0, 1, 12)) Does not work
(2, (1, 1, 2), (2, 0, 1, 12)) Does not work
(2, (1, 0, 0), (1, 0, 1, 6)) Does not work
(2, (1, 0, 0), (2, 0, 1, 6)) Does not work


In [27]:
lst

[('Burnt Oak',
  (0.19666444487863766,
   1.7327205648249944,
   array([ 7.55302893, 12.12080751,  4.9546788 ,  6.24402455,  5.99984195,
           6.44736473]))),
 ('Barnet Vale',
  (0.19666444487863766,
   1.7327205648249944,
   array([ 7.55302893, 12.12080751,  4.9546788 ,  6.24402455,  5.99984195,
           6.44736473]))),
 ('Brunswick Park',
  (0.19666444487863766,
   1.7327205648249944,
   array([ 7.55302893, 12.12080751,  4.9546788 ,  6.24402455,  5.99984195,
           6.44736473]))),
 ('Childs Hill',
  (0.19666444487863766,
   1.7327205648249944,
   array([ 7.55302893, 12.12080751,  4.9546788 ,  6.24402455,  5.99984195,
           6.44736473]))),
 ('Colindale North',
  (0.19666444487863766,
   1.7327205648249944,
   array([ 7.55302893, 12.12080751,  4.9546788 ,  6.24402455,  5.99984195,
           6.44736473]))),
 ('Colindale South',
  (0.19666444487863766,
   1.7327205648249944,
   array([ 7.55302893, 12.12080751,  4.9546788 ,  6.24402455,  5.99984195,
           6.44736473]

In [28]:
# final combined model

def optimize_sarima(input_data, training_month):
    orders = []
    for x in [0,1]:
        for y in [0,1]:
            for z in [0,1,2]:
                orders.append((x,y,z))
                
    seasonal_orders = []
    for x in [1,2]:
        for y in [0]:
            for z in [0,1]:
                for s in [6, 12]:
                    seasonal_orders.append((x,y,z,s))

    best_order_lst = []
    i=0
    looking_lst = []

    for y in orders:
        for z in seasonal_orders:
            try:
                my_order = y
                my_seasonal_order = z
                new_temp_mape, new_temp_rmse, model_fit = sarima_model(input_data, training_month, my_order=y, my_seasonal_order=z)
                looking_lst.append([(x,y,z), new_temp_mape, new_temp_rmse])
            except:
                print((x,y,z), 'Does not work')

    best_rmse = 2.0
    for item in looking_lst:
        _, _, rmse = item
        if rmse < best_rmse:
            best_rmse = rmse
            best_item = item
    return item[0][1], item[0][2]

def select_model(sarima_mape, sarima_rmse, prophet_mape, prophet_rmse):
    if sarima_rmse < prophet_rmse:
        return "SARIMA"
    else:
        return "Prophet"
    
def combined_model(input_data, training_month):
    # Run Prophet model
    prophet_mape, prophet_rmse, y_pred = prophet_model(input_data, training_month)
    # print(prophet_mape, prophet_rmse, y_pred)

    # Run SARIMA model
    opt_my_order, opt_my_seasonal_order = optimize_sarima(input_data, training_month)
    sarima_mape, sarima_rmse, model_fit= sarima_model(input_data, training_month, my_order = opt_my_order, my_seasonal_order = opt_my_seasonal_order)
    print(sarima_mape, sarima_rmse, model_fit.forecast())
    # sarima_mape, sarima_rmse, model_fit = get_sarima(input_data, training_month)

    # Compare RMSE and select the model
    model_name = select_model(sarima_mape, sarima_rmse, prophet_mape, prophet_rmse)

    if "Prophet" in model_name:
        predictions = y_pred[-1]
        mape = prophet_mape
        rmse = prophet_rmse
    elif "SARIMA" in model_name:
        predictions = model_fit.forecast()
        mape = sarima_mape
        rmse = sarima_rmse
    else:
        return 'Error has occured'

    return f"Model: {model_name}\nPredictions: {predictions}\nMAPE: {mape}\nRMSE: {rmse}"

# Usage example

# input_data = input("Please choose a ward: ")
ward_name = 'Friern Barnet'
result = combined_model(ward_name, 6)
print(result)

00:49:46 - cmdstanpy - INFO - Chain [1] start processing
00:49:46 - cmdstanpy - INFO - Chain [1] done processing
00:49:46 - cmdstanpy - INFO - Chain [1] start processing
00:49:46 - cmdstanpy - INFO - Chain [1] done processing
00:49:46 - cmdstanpy - INFO - Chain [1] start processing
00:49:47 - cmdstanpy - INFO - Chain [1] done processing
00:49:47 - cmdstanpy - INFO - Chain [1] start processing
00:49:47 - cmdstanpy - INFO - Chain [1] done processing
00:49:47 - cmdstanpy - INFO - Chain [1] start processing
00:49:47 - cmdstanpy - INFO - Chain [1] done processing
00:49:47 - cmdstanpy - INFO - Chain [1] start processing
00:49:47 - cmdstanpy - INFO - Chain [1] done processing
00:49:47 - cmdstanpy - INFO - Chain [1] start processing
00:49:47 - cmdstanpy - INFO - Chain [1] done processing
00:49:47 - cmdstanpy - INFO - Chain [1] start processing
00:49:47 - cmdstanpy - INFO - Chain [1] done processing
00:49:47 - cmdstanpy - INFO - Chain [1] start processing
00:49:47 - cmdstanpy - INFO - Chain [1]

0.8904 6.4009858991408635 2023-03-01    8.041343
Freq: MS, dtype: float64
Model: Prophet
Predictions: 9.23465047025972
MAPE: 0.6629528056942567
RMSE: 5.056563990202085


In [None]:
lst = []

for ward in list:
    ward_name = ward
    result = combined_model(ward_name, 6)
    lst.append((ward_name, result))

In [None]:
lst

In [None]:
# MAPE optimization

data = [('Burnt Oak', 'Model: Prophet', 11.00745729269387, 0.32111862552943776, 3.242689288655525),
('Barnet Vale', 'Model: Prophet', 4.315311425687993, 0.440754152535588, 1.9379991553414853),
('Brunswick Park', 'Model: SARIMA', '2023-03-01 9.793158', 0.3045, 3.7426090191126344),
('Childs Hill', 'Model: Prophet', 22.963879111442534, 0.13172407667950572, 5.229993123163133),
('Colindale North', 'Model: Prophet', 6.699985121239115, 0.22142106621346777, 2.35210366627029),
('Colindale South', 'Model: SARIMA', '146 7.112232', 0.4233, 2.63741680759111), 
('Cricklewood', 'Model: Prophet', 12.702694826481089, 0.14439883365693948, 2.0376202076710017),
('East Barnet', 'Model: Prophet', 6.701973950795069, 0.28815081510158086, 2.4885104465104653),
('East Finchley', 'Model: Prophet', 6.405562881931146, 0.4883459130057865, 2.357591289818432),
('Edgware', 'Model: Prophet', 9.813917577332381, 0.24243528819248486, 2.701912130656094),
('Edgwarebury', 'Model: SARIMA', '146 3.793131', 0.5322, 1.4787756625447277),
('Finchley Church End', 'Model: Prophet', 11.987285039167299, 0.37129692171021195, 4.64439077737032),
('Garden Suburb', 'Model: Prophet', 5.131699658606023, 0.1531519987821206, 1.8516191228124075),
('Golders Green', 'Model: Prophet', 8.84796158670176, 0.19560296175601943, 2.4779175200395525),
('Hendon', 'Model: Prophet', 13.966466918785635, 0.18339818375902225, 4.3074349828077985),
('West Hendon', 'Model: Prophet', 11.23116774275139, 0.13937335127494369, 1.63236914928524),
('High Barnet', 'Model: SARIMA', '146 6.618603', 0.1545, 1.854129492484391),
('Mill Hill', 'Model: Prophet', 9.74979568724597, 0.3327873929544118, 4.6531204203534715),
('Totteridge & Woodside', 'Model: Prophet', 11.517060451355071, 0.1526317243471115, 3.213727319191615),
('Underhill', 'Model: Prophet', 2.0499307587438467, 0.475797335648, 1.568363987056775),
('West Finchley','Model: Prophet', 11.924871095782368,0.3269670895042522, 3.793931564675),
('Whetstone',  'Model: Prophet',5.4451478574785135,0.417738856132941, 3.0117294852728436),
('Woodhouse',  'Model: Prophet', 5.6971955116636845, 0.2687636666808728, 2.932173298042915)]

df_result_mape = pd.DataFrame(data, columns=['ward_name', 'model_selected', 'predictions', 'mape', 'rmse'])
mean_mape= df_result_mape['mape'].mean()
mean_mape

# df_result_rmse

In [None]:
#RMSE optimization 

import pandas as pd

data1 = [('Burnt Oak', 'Model: Prophet', 13.002970680551433, 0.2685131018830973, 2.7296960805032584),
        ('Barnet Vale', 'Model: Prophet', 4.51939551238294, 0.38585281418096457, 1.905351352249306),
        ('Brunswick Park', 'Model: SARIMA', 9.793158, 0.3045, 3.7426090191126344),
        ('Childs Hill', 'Model: Prophet', 20.591218833526586, 0.11066340812328142, 5.1876131925790565),
        ('Colindale North', 'Model: Prophet', 7.44262609161261, 0.22498753912028832, 2.1143374114944025),
        ('Colindale South', 'Model: SARIMA', 7.112232, 0.4233, 2.63741680759111),
        ('Cricklewood', 'Model: Prophet', 12.702694826481089, 0.14439883365693948, 2.0376202076710017),
        ('East Barnet', 'Model: Prophet', 6.838802126945793, 0.2834596715491615, 2.452676399074152),
        ('East Finchley', 'Model: Prophet', 6.405562881931146, 0.4883459130057865, 2.357591289818432),
        ('Edgware', 'Model: Prophet', 9.811704140894284, 0.15740693147719445, 2.397939086482312),
        ('Edgwarebury', 'Model: SARIMA', 3.793131, 0.5322, 1.4787756625447277),
        ('Finchley Church End', 'Model: Prophet', 14.02746081981765, 0.31090979413647807, 3.911370358465527),
        ('Friern Barnet', 'Model: Prophet', 9.23465047025972, 0.6629528056942567, 5.056563990202085),
        ('Garden Suburb', 'Model: Prophet', 5.131699658606023, 0.1531519987821206, 1.8516191228124075),
        ('Golders Green', 'Model: Prophet', 12.110565061944406, 0.1530046358383567, 1.407148616731087),
        ('Hendon', 'Model: Prophet', 14.94373671826817, 0.18020478910664453, 4.2417046928196465),
        ('West Hendon', 'Model: Prophet', 11.48637787326732, 0.13607187791881054, 1.5687996362022463),
        ('High Barnet', 'Model: Prophet', 5.2412702658106305, 0.16315103379355939, 1.6978621905127846),
        ('Mill Hill', 'Model: Prophet', 8.663680570756325, 0.2829816576793635, 4.607030756663003),
        ('Totteridge & Woodside', 'Model: Prophet', 11.517060451355071, 0.1526317243471115, 3.213727319191615),
        ('Underhill', 'Model: Prophet', 1.864494970364168, 0.5411371802144144, 1.3839878457287267),
        ('West Finchley', 'Model: Prophet', 13.79653199714946, 0.3066389468395573, 3.1479829038568483),
        ('Whetstone', 'Model: Prophet', 6.492214762074795, 0.4841589030002023, 2.838135100652821),
        ('Woodhouse', 'Model: Prophet', 6.4473647265723875, 0.19666444487863766, 1.7327205648249944)
]

df_result_rmse = pd.DataFrame(data1, columns=['ward_name', 'model_selected', 'predictions', 'mape', 'rmse'])
mean_rmse= df_result_rmse['mape'].mean()
mean_rmse

# df_result_rmse