In [None]:
import pandas as pd 
df1=pd.read_csv("billwerder_schadstoffe_2012-2023.csv")
df2=pd.read_csv("billwerder_wetter_2012-2023.csv")
df=pd.merge(df1, df2, on="Messzeit")
rolling_mean = df['Schwefeldioxid (µg/m3)'].rolling(window=7, min_periods=1).mean()
df['Schwefeldioxid Mittelwert'] = rolling_mean

In [None]:
df_train = df.iloc[:2555,:]
df_val = df.iloc[2555:2920,:]
df_test = df.iloc[2920:,:]

In [None]:
df_train = df_train.dropna()
df_val = df_val.dropna()
df_test = df_test.dropna()

In [None]:
x_train = df_train.drop(columns=['Feinstaub (µg/m3)', 'Messzeit', 'Unnamed: 0_x', 'Stickstoffdioxid (µg/m3)', 'Stickstoffmonoxid (µg/m3)', 'Schwefeldioxid (µg/m3)', 'Unnamed: 0_y'])
y_train = df_train['Schwefeldioxid (µg/m3)']
x_val = df_train.drop(columns=['Feinstaub (µg/m3)', 'Messzeit', 'Unnamed: 0_x', 'Stickstoffdioxid (µg/m3)', 'Stickstoffmonoxid (µg/m3)', 'Schwefeldioxid (µg/m3)', 'Unnamed: 0_y'])
y_val = df_train['Schwefeldioxid (µg/m3)']
x_test = df_test.drop(columns=['Feinstaub (µg/m3)', 'Messzeit', 'Unnamed: 0_x', 'Stickstoffdioxid (µg/m3)', 'Stickstoffmonoxid (µg/m3)', 'Schwefeldioxid (µg/m3)', 'Unnamed: 0_y'])
y_test = df_test['Schwefeldioxid (µg/m3)']

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

def plot_history(metrics):
    history_df = pd.DataFrame.from_dict(history.history)
    sns.lineplot(data=history_df[metrics])
    plt.xlabel("epochs")
    plt.ylabel("RMSE")

In [None]:
from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasRegressor
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization, Dense
from tensorflow.keras.callbacks import EarlyStopping
earlystopper = EarlyStopping(
    monitor='val_loss',
    patience=10
    )

def create_nn(neurons=100):
    model = Sequential()
    model.add(BatchNormalization(input_shape=(6,)))
    model.add(Dense(neurons, 'relu'))
    model.add(Dense(neurons//2, 'relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse', metrics=[keras.metrics.RootMeanSquaredError()])
    return model

In [None]:
param_grid = {
    'batch_size': [8, 16, 32],

}

In [None]:
model = KerasRegressor(build_fn=create_nn, verbose=2)

In [None]:
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3)

In [None]:
grid_result = grid_search.fit(x_train, y_train, validation_data=(x_val, y_val), callbacks=[earlystopper])

In [None]:
print("Best parameters: ", grid_result.best_params_)

In [None]:
history = model.fit(x_train, y_train,
                    batch_size = 8,
                    epochs = 200,
                    validation_data=(x_val, y_val),
                    callbacks=[earlystopper],
                    verbose = 2)

In [None]:
plot_history('root_mean_squared_error')

In [None]:
y_train_predicted = model.predict(x_train)
y_test_predicted = model.predict(x_test)

In [None]:
def plot_predictions(y_pred, y_true, title):
    plt.style.use('ggplot') 
    plt.scatter(y_pred, y_true, s=10, alpha=0.5)
    plt.xlabel("predicted")
    plt.ylabel("true")
    plt.title(title)
   

plot_predictions(y_train_predicted, y_train, title='Predictions on the training set')

In [None]:
plot_predictions(y_test_predicted, y_test, title='Predictions on the test set')

In [None]:
train_metrics = model.evaluate(x_train, y_train, return_dict=True)
test_metrics = model.evaluate(x_test, y_test, return_dict=True)
print('Train RMSE: {:.2f}, Test RMSE: {:.2f}'.format(train_metrics['root_mean_squared_error'], test_metrics['root_mean_squared_error']))


In [None]:
x_test

In [None]:
plot_history(['root_mean_squared_error', 'val_root_mean_squared_error'])
