In [None]:
# Enable interactive plot
#%matplotlib notebook
%matplotlib inline


import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import numpy as np
from sklearn import datasets, linear_model
import matplotlib as mpl
from scipy.stats import linregress

import warnings

# Warnungen vom Typ DeprecationWarning ignorieren
warnings.filterwarnings("ignore")

import plotly.io as pio
pio.renderers.default = "notebook"

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_percentage_error, r2_score
import os

y_column_name = "Total"

data = pd.read_excel(
    "Daten\Endenergieverbrauch_nach_Energieträger.xlsx",
    sheet_name="Tabelle1",
    usecols=[
        "Jahr",
        "Brennstoffe",
        "Treibstoffe",
        "Elektrizität",
        "Gas",
        "Kohle",
        "Holz und Holzkohle",
        "übrige Energieträger",
    ],
)

# df_enver.set_index('Jahr', inplace=True)
data.drop_duplicates(inplace=True)
data["Total"] = data.sum(axis=1)

# data['Jahr'] = pd.to_datetime(data['Jahr']).dt.year


def train_test_split_time_series(data, test_size):
    """
    Split time series data into training and testing sets while preserving temporal order.

    Parameters:
    data (list or numpy array): Time series data.
    test_size (float): Proportion of data to include in the test set (0 < test_size < 1).

    Returns:
    train_data (list or numpy array): Training data.
    test_data (list or numpy array): Testing data.
    """
    if not 0 < test_size < 1:
        raise ValueError("test_size should be a float between 0 and 1")

    split_index = int(len(data) * (1 - test_size))
    train_data, test_data = data[:split_index], data[split_index:]
    return train_data, test_data


train, test = train_test_split_time_series(data, 0.2)
train, verification = train_test_split_time_series(train, 0.2)


y_train = train[y_column_name]
y_verification = verification[y_column_name]
y_test = test[y_column_name]

x_train = train["Jahr"]
x_verification = verification["Jahr"]
x_test = test["Jahr"]

# Reshape the input data
x_train = x_train.values.reshape(-1, 1)
x_verification = x_verification.values.reshape(-1, 1)
x_test = x_test.values.reshape(-1, 1)

# Apply the StandardScaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_verification = scaler.transform(x_verification)
x_test = scaler.transform(x_test)


# Convert back to DataFrame if needed
x_train = pd.DataFrame(x_train, columns=["Jahr"])
x_verification = pd.DataFrame(x_verification, columns=["Jahr"])
x_test = pd.DataFrame(x_test, columns=["Jahr"])

x_columns = x_train.values

data.set_index("Jahr", inplace=True)

In [None]:
import os

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score, mean_absolute_percentage_error


class PolynomialLinearRegression:
    def __init__(self, x_train, y_train, x_verification, y_verification, degrees):
        self.x_train = x_train
        self.y_train = y_train
        self.x_verification = x_verification
        self.y_verification = y_verification
        self.degrees = degrees
        self.model = LinearRegression()

    def train_and_evaluate(self):
        best_degree = None
        best_score = -1

        for degree in self.degrees:
            # print(f"Trying model with degree {degree}")
            poly = PolynomialFeatures(degree=degree, include_bias=True)

            x_train_poly = poly.fit_transform(self.x_train)
            x_verification_poly = poly.transform(self.x_verification)

            self.model.fit(x_train_poly, self.y_train)
            y_pred = self.model.predict(x_verification_poly)

            r2 = r2_score(self.y_verification, y_pred)

            if r2 > best_score:
                best_score = r2
                best_degree = degree

            directory = f"./PolynomialLinearRegression{degree}"
            self.clean_output(directory)
            self.evaluate(directory, y_pred)
            self.plot_results(
                directory, degree, x_train_poly, x_verification_poly, y_pred
            )

        return best_degree, best_score

    def evaluate(self, folder, y_pred):
        with open(f"{folder}/metric.txt", "w") as f:
            f.write(f"R2: {r2_score(self.y_verification, y_pred)} \n")
            f.write(
                f"MAPE: {mean_absolute_percentage_error(self.y_verification, y_pred)}"
            )

    def clean_output(self, folder):
        if not os.path.exists(folder):
            os.makedirs(folder)
        else:
            for filename in os.listdir(folder):
                file_path = os.path.join(folder, filename)
                try:
                    if os.path.isfile(file_path):
                        os.remove(file_path)
                except Exception as e:
                    print(f"Error deleting file: {file_path} - {e}")

    def predict(self, x_new):
        poly = PolynomialFeatures(degree=self.best_degree, include_bias=True)
        x_new_poly = poly.fit_transform(x_new)
        y_pred = self.model.predict(x_new_poly)
        return y_pred

    def plot_results(self, folder, degree, x_train_poly, x_verification_poly, y_pred):
        plt.figure(figsize=(12, 6))

        # Plot training data in blue
        plt.scatter(self.x_train, self.y_train, color="blue", label="Train Data")

        # Plot test/verification data in black
        plt.scatter(
            self.x_verification,
            self.y_verification,
            color="black",
            label="Test/Verification Data",
        )

        # Sort the data points for the predicted curve
        sorted_indices = np.argsort(x_verification_poly[:, 1])

        # Plot the predicted data in red
        plt.plot(
            x_verification_poly[sorted_indices, 1],
            y_pred[sorted_indices],
            color="red",
            label="Predicted Data",
        )

        # Calculate and display the R2 Score
        r2 = r2_score(self.y_verification, y_pred)

        # Calculate and display the MAPE
        mape = mean_absolute_percentage_error(self.y_verification, y_pred)
        plt.text(
            0.7,
            0.25,
            f"R2 Score: {r2:.2f}\nMAPE: {mape:.2f}",
            fontsize=12,
            transform=plt.gca().transAxes,
        )

        plt.title(f"Polynomial Linear Regression (Degree {degree})")
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.legend(loc="best")
        plt.savefig(f"{folder}/plot.png")
        plt.close()


# Beispielverwendung der Klasse:
degrees_to_try = range(1, 6)
poly_reg = PolynomialLinearRegression(
    x_train, y_train, x_verification, y_verification, degrees_to_try
)
best_degree, best_score = poly_reg.train_and_evaluate()
# print(f"Best Degree: {best_degree}")
# print(f"Best R2 Score: {best_score}")