In [1]:
import pandas as pd
import numpy as np  


import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error
from sklearn.svm import SVR
import sklearn

In [2]:
from sklearn.model_selection import train_test_split
import optuna

In [3]:
sol_phys_train = pd.read_csv('../data/interim/solar_phys_train.csv')
sol_phys_train = sol_phys_train.drop('Unnamed: 0', axis=1)
sol_phys_train.head()

Unnamed: 0,Radiation,Temperature,Pressure,Humidity,WindDirection(Degrees),Speed
0,634.99,289.26,4060.99,41,14.96,3.02
1,1.27,275.93,4034.32,70,207.43,2.51
2,1.21,281.48,4064.99,33,168.2,2.51
3,1.67,285.37,4060.99,101,152.6,1.51
4,839.78,289.82,4062.32,36,291.95,3.52


In [4]:
sol_phys_test = pd.read_csv('../data/interim/solar_phys_test.csv')
sol_phys_test = sol_phys_test.drop('Unnamed: 0', axis=1)
sol_phys_test.head()

Unnamed: 0,id,Temperature,Pressure,Humidity,WindDirection(Degrees),Speed
0,0,288.15,4062.32,44,312.67,1.51
1,1,288.15,4063.65,83,38.01,3.02
2,2,281.48,4051.66,78,213.62,2.51
3,3,280.37,4052.99,98,176.63,2.01
4,4,280.37,4052.99,34,175.89,3.02


In [5]:
def objective(trial):
# Invoke suggest methods of a Trial object to generate hyperparameters.

    svr_c = trial.suggest_loguniform('svr_c', 1e-10, 1e10)
    epsilon = trial.suggest_loguniform('epsilon', 1e-1, 1e1)
        

    X = sol_phys_train[[c for c in sol_phys_train if c != 'Radiation']]
    y = sol_phys_train[['Radiation']]
    X_train, X_val, y_train, y_val = sklearn.model_selection.train_test_split(X, y,random_state=0)
    
    
    model = SVR(C=svr_c, epsilon=epsilon, gamma='auto')
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_val)
    error = sklearn.metrics.mean_squared_error(y_val, y_pred)
    
    
    return error # An objective value linked with the Trial object.

In [6]:
study = optuna.create_study()
study.optimize(objective, n_trials=100)
study.best_params


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

[I 2020-05-03 23:53:02,426] Finished trial#0 with value: 60167.30022419548 with parameters: {'svr_c': 216780.11657183303, 'epsilon': 2.0806480359980952}. Best is trial#0 with value: 60167.30022419548.

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

[I 2020-05-03 23:53:21,177] Finished trial#1 with value: 140487.24468951512 with parameters: {'svr_c': 3.1219896441313255e-08, 'epsilon': 5.047092374049707}. Best is trial#0 with value: 60167.30022419548.

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().

[I 2020-05-03 23:57:56,481] Finished trial#2 with value: 58142.99139521143 with parameters: {'svr_c': 1362.804899786038, 'epsilon': 2.005242402918289}. Best is trial#2 with value: 58142.991

KeyboardInterrupt: 

In [None]:
'svr_c': 699.4904770372533, 'epsilon': 1.5723760456136062

In [8]:
X = sol_phys_train[[c for c in sol_phys_train if c != 'Radiation']]
y = sol_phys_train[['Radiation']]
X_train, X_val, y_train, y_val = sklearn.model_selection.train_test_split(X, y,random_state=0)

In [9]:
svr_c = 699.4904770372533
epsilon = 1.5723760456136062

model = SVR(kernel='rbf', C=svr_c, epsilon=epsilon, gamma='auto')
model.fit(X_train, y_train)



A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().



NameError: name 'X_valid' is not defined

In [10]:

predict = model.predict(X_val)
np.sqrt(mean_squared_error(predict, y_val))

239.87818239194658

In [14]:
predic_testf = model.predict(sol_phys_test.drop('id',axis=1))

In [54]:
submission1 = pd.DataFrame()

In [55]:
submission1['id'] = sol_phys_test['id']

In [56]:
submission1['Radiation'] = predic_testf

In [58]:
#submission1.set_index('id', inplace=True)
submission1.to_csv('../data/processed/submission1.csv')

In [59]:
submission1.describe()

Unnamed: 0,id,Radiation
count,8172.0,8172.0
mean,4085.5,193.237404
std,2359.197533,160.349376
min,0.0,-119.23856
25%,2042.75,61.507192
50%,4085.5,176.080493
75%,6128.25,278.912315
max,8171.0,1040.773669


In [60]:
sol_phys_train.describe()

Unnamed: 0,Radiation,Temperature,Pressure,Humidity,WindDirection(Degrees),Speed
count,24514.0,24514.0,24514.0,24514.0,24514.0,24514.0
mean,208.062687,283.763923,4056.016801,75.002284,143.129305,2.793646
std,316.133238,3.452542,7.299362,25.999994,82.939584,1.55628
min,1.13,274.26,4024.99,11.0,0.09,0.0
25%,1.23,280.93,4052.99,55.0,81.8725,1.51
50%,2.72,283.15,4056.99,85.0,147.585,2.51
75%,359.9275,285.93,4060.99,97.0,179.2175,3.52
max,1601.26,294.82,4074.32,103.0,359.95,17.6
