In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from io import StringIO
import time
import numpy as np
from urllib.parse import urlparse
import os
import plotly.express as px
from sklearn.impute import KNNImputer
import calendar
from tensorflow.keras.models import Sequential
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau



In [2]:
# Base URL of the Met Office historic station data page
base_url = "https://www.metoffice.gov.uk/research/climate/maps-and-data/historic-station-data"

In [3]:

# Function to get the list of station data URLs
def get_station_urls(base_url):
    response = requests.get(base_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    station_links = soup.find_all('a', text='View data')
    urls = [link['href'] for link in station_links]
    return urls

def download_station_data(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise HTTPError for bad responses (4xx and 5xx)
        data = response.text.strip()
        return data
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return None
# Function to extract filenames from URLs
def extract_filenames(urls):
    filenames = []
    for url in urls:
        parsed_url = urlparse(url)
        filename = os.path.basename(parsed_url.path)
        filenames.append(filename)
    return filenames

def save_text_to_file(text, filename):
    """
    Save the given text to a file.

    Parameters:
    text (str): The text to save.
    filename (str): The name of the file to save the text to.

    """
    try:
        with open(filename, 'w') as file:
            file.write(text)
        print(f"Text successfully saved to {filename}")
    except Exception as e:
        print(f"An error occurred while saving the text to file: {e}")

In [4]:
station_urls = get_station_urls(base_url)
stats_urls = len(station_urls)
filenames = extract_filenames(station_urls)

In [5]:
for station in range(stats_urls):
    data = download_station_data(station_urls[station])
    save_text_to_file(data, filenames[station])
    print("Station data downloaded:", filenames[station])

Text successfully saved to aberporthdata.txt
Station data downloaded: aberporthdata.txt
Text successfully saved to armaghdata.txt
Station data downloaded: armaghdata.txt
Text successfully saved to ballypatrickdata.txt
Station data downloaded: ballypatrickdata.txt
Text successfully saved to bradforddata.txt
Station data downloaded: bradforddata.txt
Text successfully saved to braemardata.txt
Station data downloaded: braemardata.txt
Text successfully saved to cambornedata.txt
Station data downloaded: cambornedata.txt
Text successfully saved to cambridgedata.txt
Station data downloaded: cambridgedata.txt
Text successfully saved to cardiffdata.txt
Station data downloaded: cardiffdata.txt
Text successfully saved to chivenordata.txt
Station data downloaded: chivenordata.txt
Text successfully saved to cwmystwythdata.txt
Station data downloaded: cwmystwythdata.txt
Text successfully saved to dunstaffnagedata.txt
Station data downloaded: dunstaffnagedata.txt
Text successfully saved to durhamdata.

# Data Loading and Cleaning

In [6]:
# Define the columns to be used
columns = ["yyyy", "mm", "tmax", "tmin", "af_days", "rain_mm", "sun_hours"]

names = [x[:-8] for x in filenames]

# Function to clean and parse a line of data
# def clean_line(line):
#     line = line.strip()
#     if "Provisional" in line:
#         line = line.replace("Provisional", "").strip()
#     parts = line.split()
#     if len(parts) == 7:
#         return parts
#     else:
#         return None
def clean_line(line):
    # Remove leading and trailing whitespace
    line = line.strip()
    
    # Remove "Provisional"
    if "Provisional" in line:
        line = line.replace("Provisional", "").strip()
    
    # Remove '*' and '#'
    line = line.replace('*', '').replace('#', '').strip()
    
    # Split the line into parts
    parts = line.split()
    
    # Check if the line has exactly 7 parts
    if len(parts) == 7:
        return parts
    else:
        return None

In [7]:
station_dict = {}
for name in names:
    station_dict[name] = [] 

In [8]:
for filename in filenames:
    # Extract the name from the filename (e.g., 'aberporth' from 'aberporth.txt')
    name = os.path.splitext(filename)[0][:-4]
    
    try:
        # Read the content of the file
        with open(filename, 'r') as file:
            for line in file:
                cleaned_line = clean_line(line)
                if cleaned_line:
                    station_dict[name].append(cleaned_line)
    except FileNotFoundError:
        print(f"File not found: {filename}")

In [9]:
dfs = {}
for name, data in station_dict.items():
    df = pd.DataFrame(data, columns=columns)
    df['sun_hours'] = df['sun_hours'].str.replace('*', '')
    df['af_days'] = df['af_days'].str.replace('*', '')
    df['tmax'] = df['tmax'].str.replace('*', '')
    df['tmin'] = df['tmin'].str.replace('*', '')
    df['rain_mm'] = df['rain_mm'].str.replace('*', '')
    df = df.apply(pd.to_numeric, errors='coerce')
    dfs[name] = df


In [10]:
# for name, df in dfs.items():
#     print(f"DataFrame: {name}")
#     print(df.isna().sum())
#     print("\n") 
# for name, df in dfs.items():
#     # Calculate the number of missing values for each column
#     missing_values = df.isna().sum()
    
#     # Convert to DataFrame for easier plotting with Plotly
#     missing_values_df = pd.DataFrame(missing_values, columns=['Missing Values']).reset_index()
#     missing_values_df.columns = ['Column', 'Missing Values']
    
#     # Create a pie chart
#     fig = px.pie(missing_values_df, names='Column', values='Missing Values', title=f'Missing Values in {name}')
    
#     # Show the plot
#     fig.show()
# combined_missing_values = {}

# # Calculate the number of missing values for each column in each DataFrame
# for name, df in dfs.items():
#     missing_values = df.isna().sum()
#     for column, count in missing_values.items():
#         if column in combined_missing_values:
#             combined_missing_values[column] += count
#         else:
#             combined_missing_values[column] = count

# # Convert the dictionary to a DataFrame for easier plotting with Plotly
# combined_missing_values_df = pd.DataFrame(list(combined_missing_values.items()), columns=['Column', 'Missing Values'])

# # Create a pie chart
# fig = px.pie(combined_missing_values_df, names='Column', values='Missing Values', title='Total Missing Values by Column Across All DataFrames')

# # Show the plot
# fig.show()

In [11]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import KNNImputer

def impute_missing_values_rf(df, target_column):
    # Split data into rows with missing and non-missing target values
    train_df = df[df[target_column].notnull()]
    test_df = df[df[target_column].isnull()]

    if train_df.empty or test_df.empty:
        print(f"Insufficient data for imputation in column '{target_column}'")
        return df

    # Features and target
    X_train = train_df.drop(columns=[target_column])
    y_train = train_df[target_column]
    X_test = test_df.drop(columns=[target_column])

    # Check for missing values in features
    if X_train.isnull().any().any() or X_test.isnull().any().any():
        print(f"Missing values detected in features for column '{target_column}'")
        return df

    # Initialize and train the model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Predict missing values
    df.loc[df[target_column].isnull(), target_column] = model.predict(X_test)

    return df

def impute_missing_values_knn(df):
    # Initialize KNNImputer
    imputer = KNNImputer(n_neighbors=1)
    
    # Fit and transform the data
    df_imputed = pd.DataFrame(imputer.fit_transform(df), columns=df.columns, index=df.index)
    
    return df_imputed


# Impute missing values
for key in dfs:
    df = dfs[key]
    print(f"Processing {key}")
    
    # First, use KNN to fill missing values in features
    df = impute_missing_values_knn(df)
    
    # List of columns you want to impute
    columns_to_impute = ['tmax', 'tmin', 'af_days', 'rain_mm', 'sun_hours']

    for col in columns_to_impute:
        if df[col].isnull().any():
            print(f"Imputing missing values in {col} of {key}")
            df = impute_missing_values_rf(df, col)
    
    dfs[key] = df
    print(f"Finished processing {key}")

def check_missing_values(dfs):
    for key, df in dfs.items():
        print(f"Checking missing values for dataset: {key}")
        
        # Total number of missing values
        total_missing = df.isnull().sum().sum()
        print(f"Total missing values: {total_missing}")
        
        if total_missing > 0:
            # Count missing values per column
            missing_per_column = df.isnull().sum()
            print("Missing values per column:")
            print(missing_per_column)
            
            # Optionally, check the percentage of missing values
            missing_percentage = (missing_per_column / len(df)) * 100
            print("Percentage of missing values per column:")
            print(missing_percentage)
        
        print("\n")  # Newline for better readability

# Call the function
check_missing_values(dfs)

def assign_season(month):
    if month in [12, 1, 2]:
        return 1  # Winter
    elif month in [3, 4, 5]:
        return 2  # Spring
    elif month in [6, 7, 8]:
        return 3  # Summer
    elif month in [9, 10, 11]:
        return 4  # Autumn
def calculate_days_in_month(row):
    year = int(row['yyyy'])
    month = int(row['mm'])
    return calendar.monthrange(year, month)[1]

for name, df in dfs.items():
    # Calculate temp_range and avg_tempdf['montly_temp_variation'] = (df['tmax'] - df['tmin'])/ df['tmin']
    df['temp_range'] = df['tmax'] - df['tmin']
    df['avg_temp'] = (df['tmax'] + df['tmin']) / 2
    
    # Convert yyyy and mm to integers if they are strings
    df["yyyy"] = df["yyyy"].astype(int)
    df["mm"] = df["mm"].astype(int)
    
    df['season'] = df['mm'].apply(assign_season)
    df['days_in_month'] = df.apply(calculate_days_in_month, axis=1)
    df['frost_day_props'] = df['af_days'] / df['days_in_month']

    # Update the DataFrame in the dictionary
    dfs[name] = df

def merge_weather_datasets(data_dict, station='station'):
    """
    Combine multiple weather DataFrames into a single DataFrame with an identifier column.

    Parameters:
    - data_dict (dict): Dictionary where keys are station names and values are DataFrames.
    - identifier_column (str): The name of the column to be added to identify the station.

    Returns:
    - pd.DataFrame: A single DataFrame with an identifier column.
    """
    # List to store DataFrames
    combined_dataframes = []
    
    for station_name, df in data_dict.items():
        # Add the station name column
        df[station] = station_name
        combined_dataframes.append(df)
    
    # Concatenate all DataFrames
    combined_df = pd.concat(combined_dataframes, ignore_index=True)
    
    return combined_df

# Combine the weather datasets with 'station' column
combined_df = merge_weather_datasets(dfs, station='station')

from datetime import datetime

def create_date(year, month):
    return datetime(year, month, 1)

combined_df['yyyy'] = combined_df['yyyy'].astype(int)
combined_df['mm'] = combined_df['mm'].astype(int)
combined_df['date'] = [create_date(year, month) for year, month in zip(combined_df['yyyy'], combined_df['mm'])]
combined_df['date'] = combined_df['date'].dt.to_period('M')
combined_df.set_index('date', inplace=True)
combined_df.drop(columns=['yyyy', 'mm'], inplace=True)
combined_df.sort_index(inplace=True)
stations = combined_df['station'].unique()

# Split the DataFrame into a dictionary of DataFrames
station_dfs = {station: combined_df[combined_df['station'] == station] for station in stations}

for station,df in station_dfs.items():
    del df['station']

# Display the DataFrames for each station
for station, df in station_dfs.items():
    print(f"\nData for {station}:\n")
    print(df)


Processing aberporth
Finished processing aberporth
Processing armagh
Finished processing armagh
Processing ballypatrick
Finished processing ballypatrick
Processing bradford
Finished processing bradford
Processing braemar
Finished processing braemar
Processing camborne
Finished processing camborne
Processing cambridge
Finished processing cambridge
Processing cardiff
Finished processing cardiff
Processing chivenor
Finished processing chivenor
Processing cwmystwyth
Finished processing cwmystwyth
Processing dunstaffnage
Finished processing dunstaffnage
Processing durham
Finished processing durham
Processing eastbourne
Finished processing eastbourne
Processing eskdalemuir
Finished processing eskdalemuir
Processing heathrow
Finished processing heathrow
Processing hurn
Finished processing hurn
Processing lerwick
Finished processing lerwick
Processing leuchars
Finished processing leuchars
Processing lowestoft
Finished processing lowestoft
Processing manston
Finished processing manston
Processi

         tmax  tmin  af_days  rain_mm  sun_hours  temp_range  avg_temp  \
date                                                                     
1957-01   9.1   2.5      7.0     73.0       73.8         6.6      5.80   
1957-02   9.6   2.7      8.0    100.5      166.6         6.9      6.15   
1957-03  12.9   5.5      4.0     61.3      175.5         7.4      9.20   
1957-04  14.2   4.4      2.0      5.5      133.5         9.8      9.30   
1957-05  16.1   5.5      1.0     43.7       99.3        10.6     10.80   
...       ...   ...      ...      ...        ...         ...       ...   
2024-02  11.7   5.3      4.0    146.6       48.8         6.4      8.50   
2024-03  12.2   4.6      4.0    113.0       84.5         7.6      8.40   
2024-04  14.1   6.4      1.0     71.2      135.3         7.7     10.25   
2024-05  18.1   8.8      0.0     84.6      191.2         9.3     13.45   
2024-06  20.5   8.8      0.0     11.4      230.0        11.7     14.65   

         season  days_in_month  frost

In [12]:
# 
# df = station_dfs['oxford']

In [13]:
# station_dfs['oxford'][variables].plot(subplots=True)

In [14]:
# scaler = MinMaxScaler(feature_range=(0,1))
# scaled_df = scaler.fit_transform(df[variables])
# scaled_df.shape

In [15]:
# def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
#     n_vars = 1 if type(data) is list else data.shape[1]
#     df = pd.DataFrame(data)
#     cols, names = list(), list()
    
#     for i in range(n_in,0,-1):
#         cols.append(df.shift(i))
#         names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
        
#     for i in range(0, n_out):
#         cols.append(df.shift(-i))
#         if i == 0:
#             names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
#         else:
#             names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
#     agg = pd.concat(cols, axis=1)
#     agg.columns = names
#     if dropnan:
#         agg.dropna(inplace=True)
#     return agg

In [16]:
# reframed = series_to_supervised(scaled_df, 12, 1)

In [17]:
# values = reframed.values
# n_train_hours = int(len(values) * 0.8)
# train = values[:n_train_hours, :]
# test = values[n_train_hours:, :]

# n_obs = 10
# train_X, train_y = train[:, :n_obs], train[:, -n_obs:]
# test_X, test_y = test[:, :n_obs], test[:, -n_obs:]

# train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
# test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))

# train_y = train_y.reshape((train_y.shape[0], train_y.shape[1]))
# test_y = test_y.reshape((test_y.shape[0], test_y.shape[1]))

In [18]:
# def create_model():
#     model = Sequential()
#     model.add(Bidirectional(LSTM(50, return_sequences=True), input_shape=(train_X.shape[1], train_X.shape[2])))
#     model.add(LSTM(10))
#     model.compile(optimizer='adam', loss='mse')
#     return model

In [19]:
# model = create_model()
# model.fit(train_X, train_y, epochs=50, batch_size=24, validation_data=(test_X, test_y), verbose=0, shuffle=False)


In [20]:
# predictions = model.predict(test_X)

In [21]:
# mae = mean_absolute_error(test_y, predictions)
# rmse = np.sqrt(mean_squared_error(test_y, predictions))
# print('Test MAE: %.3f '% mae)
# print('Test RMSE: %.3f ' % rmse)

In [22]:
# test_predictions = model.predict(test_X)

# inv_test_predictions = scaler.inverse_transform(test_predictions)
# inv_test = scaler.inverse_transform(test_y)



# for i in range(inv_test.shape[1]):
#     plt.figure(figsize=(15,30))
#     plt.subplot(inv_test.shape[1], 1, i+1)
#     plt.plot(inv_test[:, i], label='Actual')
#     plt.plot(inv_test_predictions[:, i], label='Predicted')
#     plt.title(variables[i])
#     plt.legend()
    
# plt.tight_layout()
# plt.show()

In [23]:
# import pandas as pd
# import numpy as np
# from sklearn.preprocessing import MinMaxScaler
# from sklearn.metrics import mean_absolute_error, mean_squared_error
# from keras.models import Sequential
# from keras.layers import LSTM, Bidirectional
# import matplotlib.pyplot as plt

# def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
#     n_vars = 1 if type(data) is list else data.shape[1]
#     df = pd.DataFrame(data)
#     cols, names = list(), list()
    
#     for i in range(n_in,0,-1):
#         cols.append(df.shift(i))
#         names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
        
#     for i in range(0, n_out):
#         cols.append(df.shift(-i))
#         if i == 0:
#             names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
#         else:
#             names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
#     agg = pd.concat(cols, axis=1)
#     agg.columns = names
#     if dropnan:
#         agg.dropna(inplace=True)
#     return agg

# def create_model(input_shape):
#     model = Sequential()
#     model.add(Bidirectional(LSTM(50, return_sequences=True), input_shape=input_shape))
#     model.add(LSTM(10))
#     model.compile(optimizer='adam', loss='mse')
#     return model

# def evaluate_and_plot(test_y, predictions, variables, scaler):
#     inv_test_predictions = scaler.inverse_transform(predictions)
#     inv_test = scaler.inverse_transform(test_y)

#     for i in range(inv_test.shape[1]):
#         plt.figure(figsize=(15,30))
#         plt.subplot(inv_test.shape[1], 1, i+1)
#         plt.plot(inv_test[:, i], label='Actual')
#         plt.plot(inv_test_predictions[:, i], label='Predicted')
#         plt.title(variables[i])
#         plt.legend()
        
#     plt.tight_layout()
#     plt.show()


In [24]:
# from sklearn.preprocessing import MinMaxScaler

# # List of variables you want to use
# variables = ['tmax', 'tmin', 'af_days', 'rain_mm', 'sun_hours', 'temp_range', 'avg_temp', 'season', 'days_in_month', 'frost_day_props']

# # Iterate over each dataset in the dictionary
# for station_name, df in station_dfs.items():
#     print(f"Processing dataset for station: {station_name}")

#     # Select and scale data
#     df = df[variables]
#     scaler = MinMaxScaler(feature_range=(0,1))
#     scaled_df = scaler.fit_transform(df)
    
#     # Convert to supervised learning format
#     reframed = series_to_supervised(scaled_df, 12, 1)
#     values = reframed.values

#     # Split into train and test sets
#     n_train_hours = int(len(values) * 0.8)
#     train = values[:n_train_hours, :]
#     test = values[n_train_hours:, :]
    
#     n_obs = 10
#     train_X, train_y = train[:, :n_obs], train[:, -n_obs:]
#     test_X, test_y = test[:, :n_obs], test[:, -n_obs:]

#     train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
#     test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
#     train_y = train_y.reshape((train_y.shape[0], train_y.shape[1]))
#     test_y = test_y.reshape((test_y.shape[0], test_y.shape[1]))

#     # Create and train the model
#     model = create_model((train_X.shape[1], train_X.shape[2]))
#     model.fit(train_X, train_y, epochs=50, batch_size=24, validation_data=(test_X, test_y), verbose=0, shuffle=False)
    
#     # Make predictions
#     predictions = model.predict(test_X)
    
#     # Evaluate and plot
#     mae = mean_absolute_error(test_y, predictions)
#     rmse = np.sqrt(mean_squared_error(test_y, predictions))
#     print(f"Station: {station_name} - Test MAE: {mae:.3f}, Test RMSE: {rmse:.3f}")

#     # Plot results
#     evaluate_and_plot(test_y, predictions, variables, scaler)


In [33]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from keras.models import Sequential
from keras.layers import LSTM, Bidirectional
import matplotlib.pyplot as plt

# Define functions

def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [(f'var{j+1}(t-{i})') for j in range(n_vars)]
        
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [(f'var{j+1}(t)') for j in range(n_vars)]
        else:
            names += [(f'var{j+1}(t+{i})') for j in range(n_vars)]
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    if dropnan:
        agg.dropna(inplace=True)
    return agg

def create_model(input_shape):
    model = Sequential()
    model.add(Bidirectional(LSTM(50, return_sequences=True), input_shape=input_shape))
    model.add(LSTM(10))
    model.compile(optimizer='adam', loss='mse')
    return model

def predict_future_values(model, last_known_values, n_months, n_obs):
    input_sequence = last_known_values.reshape((1, n_obs, last_known_values.shape[1]))
    predictions = []
    
    for _ in range(n_months):
        next_pred = model.predict(input_sequence, verbose=0)
        predictions.append(next_pred[0])
        input_sequence = np.roll(input_sequence, shift=-1, axis=1)
        input_sequence[0, -1, :] = next_pred
    
    return np.array(predictions)

def evaluate_and_plot(test_y, predictions, variables, scaler):
    inv_test_predictions = scaler.inverse_transform(predictions)
    inv_test = scaler.inverse_transform(test_y)

    for i in range(inv_test.shape[1]):
        plt.figure(figsize=(15, 30))
        plt.subplot(inv_test.shape[1], 1, i + 1)
        plt.plot(inv_test[:, i], label='Actual')
        plt.plot(inv_test_predictions[:, i], label='Predicted')
        plt.title(variables[i])
        plt.legend()
        
    plt.tight_layout()
    plt.show()

future_predictions_dict = {}
variables = ['tmax', 'tmin', 'af_days', 'rain_mm', 'sun_hours', 'temp_range', 'avg_temp', 'season', 'days_in_month', 'frost_day_props']
n_obs = 10  # Number of time steps

for station_name, df in station_dfs.items():
    print(f"Processing dataset for station: {station_name}")

    # Ensure the index is datetime
    #df.index = pd.to_datetime(df.index, format='%Y %m')
    # Preprocess the data
    df = df[variables]
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_df = scaler.fit_transform(df)
    
    # Convert to supervised learning format
    reframed = series_to_supervised(scaled_df, n_obs, 1)
    values = reframed.values
    
    # Split into train and test sets
    n_train_hours = int(len(values) * 0.8)
    train = values[:n_train_hours, :]
    test = values[n_train_hours:, :]
    
    # Prepare data for LSTM
    train_X = train[:, :n_obs * len(variables)].reshape((train.shape[0], n_obs, len(variables)))
    test_X = test[:, :n_obs * len(variables)].reshape((test.shape[0], n_obs, len(variables)))
    train_y = train[:, -len(variables):]
    test_y = test[:, -len(variables):]
    
    # Create and train the model
    model = create_model((train_X.shape[1], train_X.shape[2]))
    model.fit(train_X, train_y, epochs=50, batch_size=24, validation_data=(test_X, test_y), verbose=0, shuffle=False)
    
    # Evaluate the model
    predictions = model.predict(test_X)
    mae = mean_absolute_error(test_y, predictions)
    rmse = np.sqrt(mean_squared_error(test_y, predictions))
    print(f"Station: {station_name} - Test MAE: {mae:.3f}, Test RMSE: {rmse:.3f}")
    
    # Predict the next 12 months
    last_known_values = scaled_df[-n_obs:]  # Last n_obs observations
    future_predictions = predict_future_values(model, last_known_values, 12, n_obs)
    
    # Ensure the future predictions have the same number of features as the scaler was trained on
    future_predictions = future_predictions.reshape(-1, len(variables))
    
    # Inverse transform predictions
    future_predictions = scaler.inverse_transform(future_predictions)
    
    current_date = pd.Timestamp.now()
    future_dates = pd.date_range(start=current_date + pd.DateOffset(months=0), periods=12, freq='M').strftime('%Y-%m')
    future_df = pd.DataFrame(future_predictions, index=future_dates, columns=variables)
    
    future_predictions_dict[station_name] = future_df
    print(future_df.head())


Processing dataset for station: armagh


  super().__init__(**kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step
Station: armagh - Test MAE: 0.065, Test RMSE: 0.095
              tmax       tmin   af_days    rain_mm   sun_hours  temp_range  \
2024-07  18.762205  10.421249  0.096601  65.605331  128.932663    8.347065   
2024-08  18.847021  10.656327  0.007967  76.243721  121.473000    8.072048   
2024-09  16.593140   8.908561  0.220454  68.603516  113.368156    7.610687   
2024-10  13.037106   6.343275  1.713929  76.849808   89.429153    6.653501   
2024-11   9.582968   3.677126  5.332613  71.395401   65.458809    5.936857   

          avg_temp    season  days_in_month  frost_day_props  
2024-07  14.572140  3.064113      30.930000         0.000399  
2024-08  14.860631  3.075823      30.936062         0.004056  
2024-09  12.666356  3.933950      30.120861         0.002847  
2024-10   9.701289  3.985931      30.869034         0.059174  
2024-11   6.453872  3.993855      30.125641         0.173670  
Processing dataset for stat

  super().__init__(**kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step
Station: oxford - Test MAE: 0.062, Test RMSE: 0.093
              tmax       tmin   af_days    rain_mm   sun_hours  temp_range  \
2024-07  21.646265  12.189724 -0.200326  52.266247  193.952835    9.525599   
2024-08  21.204874  12.516827 -0.173411  57.038399  171.349289    8.612154   
2024-09  18.295288  10.557183  0.162244  54.304684  144.117523    7.909059   
2024-10  13.341340   7.525580  1.884651  63.733673   99.918442    6.581000   
2024-11   9.031945   4.435500  5.981750  57.532684   70.512756    5.360907   

          avg_temp    season  days_in_month  frost_day_props  
2024-07  16.863598  2.995938      30.880310        -0.006041  
2024-08  16.824238  3.042905      30.954475        -0.004958  
2024-09  14.284159  3.957219      30.171003        -0.000045  
2024-10  10.096427  3.986285      30.864853         0.056486  
2024-11   6.367551  3.970506      30.201777         0.193581  
Processing dataset for stat

  super().__init__(**kwargs)


KeyboardInterrupt: 