# Regression Analysis

Author: Dr. Stephan Hausberg, Winter semester 2024

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from mpl_toolkits.mplot3d import Axes3D

# 1. simple linear regression model
def simple_linear_regression():
    # Generate data
    np.random.seed(42)
    X = np.random.rand(100) * 10  # Unabhängige Variable
    y = 3 * X + np.random.normal(0, 3, size=100)  # Abhängige Variable mit Rauschen

    # add intercept term
    X_const = sm.add_constant(X)

    # OLS regression
    model = sm.OLS(y, X_const).fit()
    print("Simple linear model:")
    print(model.summary())

    # Visualisation
    plt.figure(figsize=(8, 6))
    plt.scatter(X, y, label="data points", alpha=0.7)
    plt.plot(X, model.predict(X_const), color="red", label="regression graph")
    plt.title("Simple linear regression")
    plt.xlabel("independent variable (X)")
    plt.ylabel("dependent variable (y)")
    plt.legend()
    plt.show()

# 2. multiple regression model
def multiple_linear_regression():
    # generate data
    np.random.seed(42)
    X1 = np.random.rand(100) * 10 
    X2 = np.random.rand(100) * 5
    y = 2 * X1 + 3 * X2 + np.random.normal(0, 2, size=100)  # Abhängige Variable

    # combination of independent variables
    X = np.column_stack((X1, X2))
    X_const = sm.add_constant(X)  # add intercept

    # fit model
    model = sm.OLS(y, X_const).fit()
    print("Multiples Lineares Modell:")
    print(model.summary())

    # visualisation
    fig = plt.figure(figsize=(10, 7))
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(X1, X2, y, label="Datenpunkte", alpha=0.7)

    # Create Surface plot
    X1_grid, X2_grid = np.meshgrid(np.linspace(X1.min(), X1.max(), 20),
                                   np.linspace(X2.min(), X2.max(), 20))
    y_pred = (model.params[0]  # Intercept
              + model.params[1] * X1_grid
              + model.params[2] * X2_grid)
    ax.plot_surface(X1_grid, X2_grid, y_pred, alpha=0.5, color='red', label="Regression area")

    ax.set_title("multiple linear regression")
    ax.set_xlabel("independent variable 1 (X1)")
    ax.set_ylabel("independent variable 2 (X2)")
    ax.set_zlabel("dependent variable (y)")
    plt.show()

# main program
if __name__ == "__main__":
    
    simple_linear_regression()

    multiple_linear_regression()

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import seaborn as sns

# 3. logisitc regression
def logistic_regression_example():
    # generate data
    np.random.seed(42)
    X, y = make_classification(
        n_samples=200,  # number of datapoints
        n_features=2,   # number of features
        n_informative=2,
        n_redundant=0,
        n_clusters_per_class=1,
        class_sep=1.5,  # class distances
        random_state=42
    )

    # splitting in test- and training data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # fitting logistic regression model
    model = LogisticRegression()
    model.fit(X_train, y_train)

    # prediction and validation
    y_pred = model.predict(X_test)
    print("Logistische Regression - Evaluierung:")
    print(classification_report(y_test, y_pred))
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")

    # visualization
    plt.figure(figsize=(8, 6))
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                         np.arange(y_min, y_max, 0.1))

    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    plt.contourf(xx, yy, Z, alpha=0.8, cmap="coolwarm")
    sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=y, style=y, palette="Set1", edgecolor="k")
    plt.title("Logistic regression")
    plt.xlabel("Feature 1")
    plt.ylabel("Feature 2")
    plt.show()

# main program
if __name__ == "__main__":

    # example 3: logistic regression
    logistic_regression_example()
