In [None]:
import pandas as pd
import statsmodels.formula.api as smf

# Load datasets
df_sales = pd.read_csv('./umsatzdaten_gekuerzt.csv')
print(df_sales)
df_weather = pd.read_csv('wetter.csv')
print(df_weather)
df_kiwo = pd.read_csv('kiwo.csv')
df_holidays = pd.read_csv('Feiertage.csv')

# Merge sales and weather data
df_merged = pd.merge(df_sales, df_weather, on='Datum', how='left')
print(df_merged)

# Merge with Kieler Woche data
df_merged_with_kiwo = pd.merge(df_merged, df_kiwo, on='Datum', how='left')

# Define weather codes that indicate precipitation, snow, or wet conditions
wet_weather_codes = [
    # Rain related codes
    50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
    60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
    # Snow related codes
    70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
    # Shower related codes
    80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
    # Thunder related codes with precipitation
    91, 92, 93, 94, 95, 96, 97, 98, 99
]

# Create dry_weather column (1 for dry, 0 for wet)
df_merged_with_kiwo['dry_weather'] = df_merged_with_kiwo['Wettercode'].apply(lambda x: 0 if x in wet_weather_codes else 1)
print(df_merged_with_kiwo)

# Fill missing values in KielerWoche column with 0
df_merged_with_kiwo['KielerWoche'].fillna(0, inplace=True)

# Prepare holidays data
df_holidays['Feiertag'] = 1

# Merge with Feiertag data
df_final = pd.merge(df_merged_with_kiwo, df_holidays, on='Datum', how='left')
df_final['Feiertag'].fillna(0, inplace=True)

# Print min and max dates
min_date = df_merged_with_kiwo['Datum'].min()
max_date = df_merged_with_kiwo['Datum'].max()
print(f"Min Datum: {min_date}")
print(f"Max Datum: {max_date}")




           Datum  Warengruppe      Umsatz
0     2013-07-01            1  148.828353
1     2013-07-02            1  159.793757
2     2013-07-03            1  111.885594
3     2013-07-04            1  168.864941
4     2013-07-05            1  171.280754
...          ...          ...         ...
9329  2017-12-21            6   87.471228
9330  2017-12-22            6   71.911652
9331  2017-12-23            6   84.062223
9332  2017-12-24            6   60.981969
9333  2017-12-27            6   34.972644

[9334 rows x 3 columns]
           Datum  Bewoelkung  Temperatur  Windgeschwindigkeit  Wettercode
0     2012-01-01         8.0      9.8250                   14        58.0
1     2012-01-02         7.0      7.4375                   12         NaN
2     2012-01-03         8.0      5.5375                   18        63.0
3     2012-01-04         4.0      5.6875                   19        80.0
4     2012-01-05         6.0      5.3000                   23        80.0
...          ...         ..

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_merged_with_kiwo['KielerWoche'].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_final['Feiertag'].fillna(0, inplace=True)


In [25]:
print(df_final)

model = smf.ols(formula='Umsatz ~ Temperatur + dry_weather + Windgeschwindigkeit + KielerWoche + Feiertag', data=df_final).fit()

#Zusätzliche Features: Wochentage, Ferien, Warengruppe. Bins für Wettercodes überarbeiten.

print(model.summary())

           Datum  Warengruppe      Umsatz  Bewoelkung  Temperatur  \
0     2013-07-01            1  148.828353         6.0     17.8375   
1     2013-07-02            1  159.793757         3.0     17.3125   
2     2013-07-03            1  111.885594         7.0     21.0750   
3     2013-07-04            1  168.864941         7.0     18.8500   
4     2013-07-05            1  171.280754         5.0     19.9750   
...          ...          ...         ...         ...         ...   
9329  2017-12-21            6   87.471228         7.0      6.2375   
9330  2017-12-22            6   71.911652         7.0      5.7625   
9331  2017-12-23            6   84.062223         7.0      7.8000   
9332  2017-12-24            6   60.981969         7.0      8.1125   
9333  2017-12-27            6   34.972644         7.0      4.6125   

      Windgeschwindigkeit  Wettercode  KielerWoche  dry_weather  Feiertag  \
0                    15.0        20.0          0.0            1       0.0   
1                