In [71]:
import pandas as pd

In [72]:
df_wetter = pd.read_csv("../Temp_DWH/wetterdaten_median.csv")
df_else = pd.read_csv("../Temp_DWH/gemerged.csv")
df_else = df_else.loc[df_else['Unternehmensbeschreibung'] == "Campingplätze"]
df_else = df_else[["Open", "Umsatz in millionen", "Datum"]]
df_else = df_else.groupby("Datum").mean()

In [73]:
df = pd.merge(df_else, df_wetter, on="Datum")
df.head(300)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 230 entries, 0 to 229
Data columns (total 17 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Datum                230 non-null    object 
 1   Open                 211 non-null    float64
 2   Umsatz in millionen  230 non-null    float64
 3   FX                   230 non-null    float64
 4   FM                   230 non-null    float64
 5   RSK                  230 non-null    float64
 6   RSKF                 230 non-null    float64
 7   SDK                  230 non-null    float64
 8   SHK_TAG              230 non-null    float64
 9   NM                   230 non-null    float64
 10  VPM                  230 non-null    float64
 11  PM                   230 non-null    float64
 12  TMK                  230 non-null    float64
 13  UPM                  230 non-null    float64
 14  TXK                  230 non-null    float64
 15  TNK                  230 non-null    flo

In [84]:
from pmdarima import auto_arima
ex_var = ["TMK", "PM", "FM", "RSK", "SDK", "SHK_TAG", "NM", "VPM"]
# Automatische ARIMA-Identifikation
auto_model = auto_arima(df["Umsatz in millionen"], 
                      exogenous= ex_var,
                      m=12,               # frequency of series                      
                      seasonal=True,  # TRUE if seasonal series
                      d=None,             # let model determine 'd'
                      test='adf',         # use adftest to find optimal 'd'
                      start_p=0, start_q=0, # minimum p and q
                      max_p=12, max_q=12, # maximum p and q
                      D=None,             # let model determine 'D'
                      trace=True,
                      error_action='ignore',  
                      suppress_warnings=True, 
                      stepwise=True)
print(auto_model.summary())
print(auto_model)

Performing stepwise search to minimize aic
 ARIMA(0,0,0)(1,0,1)[12] intercept   : AIC=2427.944, Time=0.30 sec
 ARIMA(0,0,0)(0,0,0)[12] intercept   : AIC=2426.474, Time=0.01 sec
 ARIMA(1,0,0)(1,0,0)[12] intercept   : AIC=2349.452, Time=0.17 sec
 ARIMA(0,0,1)(0,0,1)[12] intercept   : AIC=2330.578, Time=0.16 sec
 ARIMA(0,0,0)(0,0,0)[12]             : AIC=2806.754, Time=0.01 sec
 ARIMA(0,0,1)(0,0,0)[12] intercept   : AIC=2329.226, Time=0.06 sec
 ARIMA(0,0,1)(1,0,0)[12] intercept   : AIC=2330.539, Time=0.19 sec
 ARIMA(0,0,1)(1,0,1)[12] intercept   : AIC=2332.652, Time=0.26 sec
 ARIMA(1,0,1)(0,0,0)[12] intercept   : AIC=2319.416, Time=0.24 sec
 ARIMA(1,0,1)(1,0,0)[12] intercept   : AIC=2320.950, Time=0.45 sec
 ARIMA(1,0,1)(0,0,1)[12] intercept   : AIC=2320.946, Time=0.29 sec
 ARIMA(1,0,1)(1,0,1)[12] intercept   : AIC=2323.010, Time=0.57 sec
 ARIMA(1,0,0)(0,0,0)[12] intercept   : AIC=2348.700, Time=0.02 sec
 ARIMA(2,0,1)(0,0,0)[12] intercept   : AIC=2293.192, Time=0.26 sec
 ARIMA(2,0,1)(1,0,0

In [89]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# Beispielhafte Daten für den DataFrame 'df'
# In der Praxis würden Sie df direkt einlesen, z.B. aus einer CSV-Datei
df_wetter = pd.read_csv("../Temp_DWH/wetterdaten_median.csv")
df_else = pd.read_csv("../Temp_DWH/gemerged.csv")
df_else = df_else.loc[df_else['Unternehmensbeschreibung'] == "Campingplätze"]
df_else = df_else[["Open", "Umsatz in millionen", "Datum"]]
df_else = df_else.groupby("Datum").mean()
df = pd.merge(df_else, df_wetter, on="Datum")
# Datum als Index setzen und in das Datetime-Format konvertieren
df['Datum'] = pd.to_datetime(df['Datum'])
df.set_index('Datum', inplace=True)
exog = ["TMK", "PM", "FM", "RSK", "SDK", "SHK_TAG", "NM", "VPM", "UPM", "TNK", "TXK"]

# Funktionen zur Modellanpassung und Evaluierung
def evalARMA(model, df, forecast_steps=12, mkplot=True):
    # Training: Vorhersagen innerhalb des Trainingsdatensatzes
    eval_df = df[['Umsatz in millionen']].copy()
    eval_df['pred'] = model.fittedvalues
    print("Mean Squared Error (In-Sample):", mean_squared_error(eval_df["Umsatz in millionen"], eval_df.pred))
    
    # Forecasting: Vorhersagen für zukünftige Zeiträume
    forecast_exog = df[exog].iloc[-forecast_steps:]  # Verwenden Sie die letzten bekannten exogenen Variablenwerte
    forecast_index = pd.date_range(start=df.index[-1] + pd.DateOffset(months=1), periods=forecast_steps, freq='M')
    forecast = model.get_forecast(steps=forecast_steps, exog=forecast_exog)
    forecast_mean = forecast.predicted_mean
    forecast_ci = forecast.conf_int()

    # Ergebnisse zusammenfassen
    forecast_df = pd.DataFrame({
        'Umsatz in millionen': [np.nan] * forecast_steps,
        'pred': forecast_mean
    }, index=forecast_index)
    
    eval_df = pd.concat([eval_df, forecast_df])

    # Plot erstellen
    if mkplot:
        fig = make_subplots(specs=[[{"secondary_y": False}]])

        # Historische Daten
        fig.add_trace(
            go.Scatter(
                x=df.index,
                y=df["Umsatz in millionen"],
                mode='lines',
                name='Historische Umsatzdaten'
            ),
            secondary_y=False
        )

        # Trainingsvorhersagen
        fig.add_trace(
            go.Scatter(
                x=eval_df.index[:len(df)],
                y=eval_df['pred'][:len(df)],
                mode='lines',
                name='Trainingsvorhersagen'
            ),
            secondary_y=False
        )

        # Forecasting Vorhersagen
        fig.add_trace(
            go.Scatter(
                x=forecast_index,
                y=forecast_mean,
                mode='lines',
                name='Vorhergesagte Umsätze'
            ),
            secondary_y=False
        )

        # Konfidenzintervalle
        fig.add_trace(
            go.Scatter(
                x=forecast_index,
                y=forecast_ci.iloc[:, 0],
                mode='lines',
                fill=None,
                name='Konfidenzintervall (untere Grenze)',
                line=dict(color='rgba(0,100,80,0.2)')
            ),
            secondary_y=False
        )

        fig.add_trace(
            go.Scatter(
                x=forecast_index,
                y=forecast_ci.iloc[:, 1],
                mode='lines',
                fill='tonexty',
                name='Konfidenzintervall (obere Grenze)',
                line=dict(color='rgba(0,100,80,0.2)')
            ),
            secondary_y=False
        )

        # Layout anpassen
        fig.update_layout(
            title='SARIMAX Vorhersagen für den Umsatz',
            xaxis_title='Datum',
            yaxis_title='Umsatz in Millionen',
            template='plotly'
        )

        # Plot anzeigen
        fig.show()

    return eval_df

# SARIMAX-Modell anpassen
arma = SARIMAX(df[['Umsatz in millionen']], exog=df[exog],
               order=(0,0,0), seasonal_order=(0,0,0,12)).fit()

# Modell evaluieren und Vorhersagen plotten
eval_df = evalARMA(arma, df, forecast_steps=50)





A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.


A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.



Mean Squared Error (In-Sample): 584.0656649467651



'M' is deprecated and will be removed in a future version, please use 'ME' instead.


No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.

