# Model Template

In [18]:
# import necessary packages

import pandas as pd
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
# TODO: modell mit eurem ersetzen
from sklearn.svm import SVR

# Empfohlen von sklearn als Alternative zu SVR, da besser skalierbar auf größere Datensätze
from sklearn.svm import LinearSVR
from sklearn.linear_model import SGDRegressor

In [19]:
# read data

train_df = pd.read_csv('data/preprocessed_data/train.csv')
test_df = pd.read_csv('data/preprocessed_data/test.csv')

In [20]:
# drop start date for regression (is represented as month, day, pm) 
train_df = train_df.drop(columns='start_date')
test_df = test_df.drop(columns='start_date')

# drop null values
train_df = train_df.dropna()
test_df = test_df.dropna()

In [21]:
# split data

X_train = train_df.drop(columns='count')
y_train = train_df['count']

X_val, X_test, y_val, y_test = train_test_split(
    test_df.drop(columns='count'),
    test_df['count'],
    test_size=0.5,
    shuffle=True,
    random_state=7
)

In [22]:
y_train.describe()

count    402899.000000
mean         23.192028
std          26.375188
min           1.000000
25%           7.000000
50%          15.000000
75%          29.000000
max         547.000000
Name: count, dtype: float64

In [23]:
# hyperparameter tuning

try:
    # TODO: file-name ersetzen (in diesem File werden die Ergebnisse des Hyperparameter Tuning gespeichert)
    hyperparameters_df = pd.read_csv('data/hyperparameter_tuning/SVR.csv')
    
except FileNotFoundError:
    
    # df containing hyperparameters and evaluation metrics of each run
    hyperparameters_df = pd.DataFrame()
    
    # this function is used by optuna to tune the hyperparameters
    def objective(trial):
        # TODO: die Hyperparameter mit denen eures Modells ersetzen
        # - integers: trial.suggest_int(name, low, high)
        # - floats: trial.suggest_int(name, low, high)
        # - kategorisch: trial.suggest_categorical(name, choices)
        # (https://optuna.readthedocs.io/en/v2.0.0/reference/generated/optuna.trial.Trial.html)
        # define hyperparameters
        kernel = trial.suggest_categorical('kernel', ['poly', 'rbf'])
        gamma = trial.suggest_categorical('gamma', ['scale', 'auto'])
        c_regularizaion = trial.suggest_float('C', 1, 100)
        
        # TODO: mit eurem Modell ersetzen
        # setup and train model
        SVR_reg = SVR(
            kernel=kernel,
            gamma=gamma,
            C=c_regularizaion
        )
        SVR_reg.fit(X_train, y_train)
        
        # make predictions
        y_val_pred = SVR_reg.predict(X_val)
        
        # evaluate predictions
        r_squared = r2_score(y_val, y_val_pred)
        rmse = mean_squared_error(y_val, y_val_pred) ** 0.5
        
        # TODO: mit euren Hyperparametern ersetzen
        # insert results in dataframe
        global hyperparameters_df
        hyperparameters_df = hyperparameters_df.append(
            {'kernel': kernel,
             'gamma': gamma,
             'C': c_regularizaion,
             'r_squared': r_squared,
             'rmse': rmse},
            ignore_index=True
        )
        
        # return rmse -> optuna will optimize rmse
        return rmse
        
        
    study = optuna.create_study()
    # start optimization
    study.optimize(objective, n_trials=25)
    
    # TODO: evtl. müsst ihr auch noch mal die Datentypen anpassen
    # convert to correct data types
    # hyperparameters_df[['n_estimators', 'max_depth']] = hyperparameters_df[['n_estimators', 'max_depth']].astype('int')
    
    # sort hyperparameter tuning results and save file
    hyperparameters_df = hyperparameters_df.sort_values('rmse', ascending=True)
    hyperparameters_df = hyperparameters_df.reset_index(drop=True)
    hyperparameters_df.to_csv('data/hyperparameter_tuning/SVR.csv', index=False)

In [24]:
hyperparameters_df.head()

Unnamed: 0,kernel,gamma,C,r_squared,rmse


In [25]:
# final model evaluation

# TODO: mit eurem Modell und Hyperparametern ersetzen
# build and train model using the most successful hyperparameters
SVR_reg = SVR()
SVR_reg.fit(X_train, y_train)

# make predictions
y_test_pred = SVR_reg.predict(X_test)

# evaluate predictions
r_squared = r2_score(y_test, y_test_pred)
rmse = mean_squared_error(y_test, y_test_pred) ** 0.5

print(f'R^2:\t{r_squared}')
print(f'RMSE:\t{rmse}')