In [None]:
import numpy as np
import pandas as pd

# Load the processed data and feature columns
wetter_umsatzdaten_kiwo = pd.read_csv("../1_DatasetCharacteristics/processed_data.csv")

# Feature columns (excluding one-hot encoded Warengruppe columns)
feature_columns = ['Bewoelkung', 'Temperatur', 'Windgeschwindigkeit', 'Wettercode', 
                   'KiWo', 'Is_Weekend', 'Temperature_Category', 'Windgeschwindigkeit_Beaufort']

# Split dataset into training and validation sets
training_start_date = '2013-07-01'
training_end_date = '2017-07-31'
validation_start_date = '2017-08-01'
validation_end_date = '2018-07-31'

# Regularization functions remain the same
def compute_cost_reg(X, y, w, b, lambda_):
    m = X.shape[0]
    cost = (1 / (2 * m)) * np.sum((np.dot(X, w) + b - y) ** 2)
    reg_cost = (lambda_ / (2 * m)) * np.sum(w ** 2)
    return cost + reg_cost

def gradient_descent_reg(X, y, w_in, b_in, alpha, num_iters, lambda_):
    w = w_in
    b = b_in
    for _ in range(num_iters):
        dj_dw = (1 / len(X)) * np.dot((np.dot(X, w) + b - y), X) + (lambda_ / len(X)) * w
        dj_db = (1 / len(X)) * np.sum(np.dot(X, w) + b - y)
        w -= alpha * dj_dw
        b -= alpha * dj_db
    return w, b

# Train separate models for each Warengruppe
models = {}
for i in range(1, 7):
    # Filter data for specific Warengruppe
    warengruppe_data = wetter_umsatzdaten_kiwo[wetter_umsatzdaten_kiwo[f'Warengruppe_{i}'] == True]
    
    # Split data
    training_data = warengruppe_data[
        (warengruppe_data['Datum'] >= training_start_date) & 
        (warengruppe_data['Datum'] <= training_end_date)
    ]
    validation_data = warengruppe_data[
        (warengruppe_data['Datum'] >= validation_start_date) & 
        (warengruppe_data['Datum'] <= validation_end_date)
    ]
    
    # Prepare features and target
    X_train = training_data[feature_columns].apply(pd.to_numeric, errors='coerce').fillna(0).to_numpy(dtype=np.float64)
    y_train = training_data['Umsatz'].to_numpy()
    
    X_val = validation_data[feature_columns].apply(pd.to_numeric, errors='coerce').fillna(0).to_numpy(dtype=np.float64)
    y_val = validation_data['Umsatz'].to_numpy()
    
    # Normalize features
    X_mean = np.mean(X_train, axis=0)
    X_std = np.std(X_train, axis=0)
    X_std[X_std == 0] = 1
    X_train = (X_train - X_mean) / X_std
    X_val = (X_val - X_mean) / X_std
    
    # Hyperparameter tuning
    lambda_values = [0.01, 0.1, 1, 10]
    best_r_squared = -np.inf
    best_lambda = None
    
    for lambda_ in lambda_values:
        w_init = np.zeros(X_train.shape[1])
        b_init = 0
        w_final, b_final = gradient_descent_reg(X_train, y_train, w_init, b_init, 0.01, 1000, lambda_)
        y_val_pred = np.dot(X_val, w_final) + b_final
        r_squared = 1 - (np.sum((y_val - y_val_pred) ** 2) / np.sum((y_val - np.mean(y_val)) ** 2))
        
        if r_squared > best_r_squared:
            best_r_squared = r_squared
            best_lambda = lambda_
    
    # Train final model with best lambda
    w_final, b_final = gradient_descent_reg(X_train, y_train, np.zeros(X_train.shape[1]), 0, 0.01, 1000, best_lambda)
    
    # Store model for this Warengruppe
    models[i] = {
        'w_final': w_final,
        'b_final': b_final,
        'X_mean': X_mean,
        'X_std': X_std,
        'best_lambda': best_lambda,
        'best_r_squared': best_r_squared,
        'feature_columns': feature_columns
    }
    
    # Validate the model for this Warengruppe
    y_val_pred = np.dot(X_val, w_final) + b_final
    validation_cost = compute_cost_reg(X_val, y_val, w_final, b_final, best_lambda)
    mse = np.mean((y_val - y_val_pred) ** 2)
    
    print(f"\nWarengruppe {i} Model:")
    print(f"Best Lambda: {best_lambda}, Best R^2: {best_r_squared:.4f}")
    print(f"Validation cost: {validation_cost:.4e}")
    print(f"Mean Squared Error (MSE) on validation set: {mse:.4e}")
    
    # Output model parameters for each Warengruppe
    print("\nModel Parameters:")
    for j, feature in enumerate(feature_columns):
        print(f"Feature: {feature}, Coefficient: {w_final[j]:.4f}")
    print(f"Intercept (b): {b_final:.4f}")

# Optional: Prediction function
def predict_sales(features, warengruppe):
    model = models[warengruppe]
    # Normalize input features
    features_normalized = (features - model['X_mean']) / model['X_std']
    return np.dot(features_normalized, model['w_final']) + model['b_final']