In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV

import pandas as pd, numpy as np, tensorflow as tf

In [2]:
def mean_absolute_percentage_error(y_true, y_pred):
    return tf.reduce_mean(tf.abs((y_true-y_pred) / y_true)) * 100.0

df = pd.read_feather('E:/Skóli/HÍ/Vélaverkfræði Master HÍ/Lokaverkefni/Data/merged-full-25ms-24hr-28-2-24.feather')
df = df[df.f < df.fg]
df['gust_factor'] = df.fg / df.f
df = df.dropna()
df = df.drop(['f', 'fg', 'fsdev', 'd', 'dsdev', 'longitude', 'latitude', 'X', 'Y', 'time', 'stod'], axis = 1)# + [f'Landscape_{i}' for i in range(70)], axis = 1)

y = df.gust_factor
X = df.drop(['gust_factor'], axis = 1)

# Changing the type of X,y so as to work with Tensorflow
X, y = X.values.astype(np.float32), y.values.astype(np.float32)

scaler = StandardScaler()

# Assuming 'X' is your feature matrix and 'y' is your target variable
# Replace 'X' and 'y' with your actual data

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)



In [10]:
param_grid = {
    'kernel': ['linear', 'rbf', 'poly'],
    'C': [0.1, 1, 10],
    'epsilon' : [0.1, 0.2, 0.5],
}

model = SVR()

grid_search = GridSearchCV(model, param_grid, cv = 5, scoring = 'neg_mean_squared_error', verbose = 2)
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

best_params

Fitting 5 folds for each of 27 candidates, totalling 135 fits
[CV] END ..................C=0.1, epsilon=0.1, kernel=linear; total time=   1.9s
[CV] END ..................C=0.1, epsilon=0.1, kernel=linear; total time=   2.1s
[CV] END ..................C=0.1, epsilon=0.1, kernel=linear; total time=   1.8s
[CV] END ..................C=0.1, epsilon=0.1, kernel=linear; total time=   1.9s
[CV] END ..................C=0.1, epsilon=0.1, kernel=linear; total time=   1.9s
[CV] END .....................C=0.1, epsilon=0.1, kernel=rbf; total time=   1.4s
[CV] END .....................C=0.1, epsilon=0.1, kernel=rbf; total time=   1.4s
[CV] END .....................C=0.1, epsilon=0.1, kernel=rbf; total time=   1.4s
[CV] END .....................C=0.1, epsilon=0.1, kernel=rbf; total time=   1.5s
[CV] END .....................C=0.1, epsilon=0.1, kernel=rbf; total time=   1.4s
[CV] END ....................C=0.1, epsilon=0.1, kernel=poly; total time=   1.3s
[CV] END ....................C=0.1, epsilon=0.1

{'C': 1, 'epsilon': 0.2, 'kernel': 'rbf'}

In [12]:
model = SVR(C=1, epsilon = 0.2, kernel = 'rbf')
model.fit(X_train, y_train)

In [5]:
# Before gridsearch
y_pred = model.predict(X_test)

mape = mean_absolute_percentage_error(y_test, y_pred)

print(f"MAPE: {mape}%")

MAPE: 11.4608998514216%


In [13]:
# After gridsearch
y_pred = model.predict(X_test)

mape = mean_absolute_percentage_error(y_test, y_pred)

print(f"MAPE: {mape}%")

MAPE: 11.4608998514216%


In [16]:
from joblib import dump

dump(model, "./saved_models/svr.pkl")

['./saved_models/svr.pkl']