In [None]:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import LinearRegression, Lasso, LassoCV

from utils.data import load_raw_data, load_and_clean_data, get_train_test_split
from utils.evaluation import evaluate_model, compare_models, add_result, reset_results
from utils.plotting import plot_predicted_vs_actual, plot_correlation_heatmap

plt.rcParams['figure.dpi'] = 100

# KI1-Projekt 308: California Housing — Übersicht

**Thema:** Neuronales Netz für den kalifornischen Hauspreis-Datensatz  
**Gruppennummer:** 308  
**Deadline:** 15.04.2026

## Projektstruktur

| Notebook | Inhalt | Verantwortlich |
|----------|--------|---------------|
| `01_EDA.ipynb` | Explorative Datenanalyse | Alle |
| `02_Baseline_Lineare_Regression.ipynb` | Referenzmodell | Alle |
| `03_LASSO_Ridge.ipynb` | Regularisierte Regression, Feature-Selektion | P1 |
| `04_Decision_Tree.ipynb` | Decision Tree + Pruning | P2 |
| `05_Ensemble.ipynb` | Random Forest, Gradient Boosting | P3 |
| `06_kNN_Regression.ipynb` | k-Nearest Neighbors Regression | P4 |
| `07_Neural_Network.ipynb` | NN (Kernaufgabe) + Vergleich mit LR | P5 |

## Gemeinsame Module (`utils/`)
- `data.py` — Datenladen, Cleaning, Train/Test-Split (fester random_state=42)
- `plotting.py` — Einheitliche Visualisierungen
- `evaluation.py` — R², MAE, RMSE für alle Modelle

In [None]:
# Schnelltest: Daten laden und Überblick
df = load_and_clean_data()
print(f"Bereinigter Datensatz: {df.shape[0]} Zeilen, {df.shape[1]} Spalten")
print(f"\nFeatures: {list(df.columns[:-1])}")
print(f"Zielvariable: {df.columns[-1]}")
df.describe()

In [None]:
# Baseline: Lineare Regression
reset_results()
X_train, X_test, y_train, y_test, feature_names = get_train_test_split(df)

lr = LinearRegression()
lr.fit(X_train, y_train)
result_lr = evaluate_model(lr, X_train, X_test, y_train, y_test, "Lineare Regression (Baseline)")
add_result(result_lr)

# Vergleichstabelle (wird mit weiteren Modellen erweitert)
compare_models()