In [2]:
import numpy as np
import pandas as pd

# SVR Linear (dados originais)

In [35]:
df = pd.read_csv('./melting-point/train_wz_ws.csv')

In [16]:
melting_point={}
melting_point["data"] = df.drop(['Tm'], axis=1).values
melting_point["target"] = df["Tm"].values

In [19]:
from sklearn.svm import LinearSVR
from sklearn.model_selection import cross_val_score

lsvr = LinearSVR(max_iter=1000)

svm_scores = cross_val_score(lsvr, melting_point["data"], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(svm_scores)
print(svm_scores.mean())
print(svm_scores.std())

[-48.56622617 -43.08783318 -44.00966523 -41.9578608  -43.17784645
 -42.81246195 -47.77304928 -41.75554169 -41.52655535 -39.02037495]
-43.36874150549619
2.72290826119782


In [26]:
from sklearn.model_selection import GridSearchCV
import multiprocessing

parameters = {
    'C': [ 0.1, 0.2, 0.4, 0.5, 1.0, 1.5, 1.8, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0],
    'loss': ['epsilon_insensitive', 'squared_epsilon_insensitive']
}
clf = GridSearchCV(lsvr, parameters, n_jobs=multiprocessing.cpu_count(), cv=10, scoring='neg_mean_absolute_error')

In [27]:
clf.fit(melting_point['data'], melting_point['target'])

0,1,2
,estimator,LinearSVR()
,param_grid,"{'C': [0.1, 0.2, ...], 'loss': ['epsilon_insensitive', 'squared_epsilon_insensitive']}"
,scoring,'neg_mean_absolute_error'
,n_jobs,16
,refit,True
,cv,10
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,epsilon,0.0
,tol,0.0001
,C,1.0
,loss,'squared_epsilon_insensitive'
,fit_intercept,True
,intercept_scaling,1.0
,dual,'auto'
,verbose,0
,random_state,
,max_iter,1000


In [28]:
lsvr_scores = cross_val_score(clf.best_estimator_, melting_point["data"], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(lsvr_scores)
print(lsvr_scores.mean())
print(lsvr_scores.std())

[-38.07162036 -36.31998602 -37.45368913 -34.642331   -38.17301367
 -38.15640703 -38.51415595 -37.13967755 -38.17730021 -34.18003618]
-37.08282170996903
1.472082872972242


## NuSVR Linear

In [23]:
from sklearn.svm import NuSVR

param_grid = [ 
    {  
        'nu': np.arange(0.05, 1.0, 0.05) 
    } 
] 
 
gs = GridSearchCV(estimator=NuSVR(kernel='linear'), param_grid=param_grid, scoring='neg_mean_absolute_error', cv=10, n_jobs=multiprocessing.cpu_count()) 

In [24]:
gs.fit(melting_point['data'], melting_point['target'])

0,1,2
,estimator,NuSVR(kernel='linear')
,param_grid,"[{'nu': array([0.05, ..., 0.9 , 0.95])}]"
,scoring,'neg_mean_absolute_error'
,n_jobs,16
,refit,True
,cv,10
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,nu,np.float64(0.9500000000000001)
,C,1.0
,kernel,'linear'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,tol,0.001
,cache_size,200
,verbose,False


In [25]:
lsvr_scores = cross_val_score(gs.best_estimator_, melting_point["data"], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(lsvr_scores)
print(lsvr_scores.mean())
print(lsvr_scores.std())

[-47.96795095 -43.01898246 -43.82945149 -42.74726149 -42.37824352
 -42.73237869 -47.24782709 -42.73365662 -42.58791994 -38.24019413]
-43.348386638439216
2.561596551061093


# SVR Não-Linear (dados originais)

In [39]:
from sklearn.svm import SVR

param_grid =  {  
    'kernel': ['rbf', 'poly', 'sigmoid'],
    'C': [ 0.1, 0.2, 0.4, 0.5, 1.0, 1.5, 1.8, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0]
} 

gs = GridSearchCV(SVR(max_iter=1000), param_grid, n_jobs=multiprocessing.cpu_count(), cv=10, scoring='neg_mean_absolute_error')

In [40]:
gs.fit(melting_point['data'], melting_point['target'])



0,1,2
,estimator,SVR(max_iter=1000)
,param_grid,"{'C': [0.1, 0.2, ...], 'kernel': ['rbf', 'poly', ...]}"
,scoring,'neg_mean_absolute_error'
,n_jobs,16
,refit,True
,cv,10
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,tol,0.001
,C,5.0
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False


In [41]:
svr_scores = cross_val_score(gs.best_estimator_, melting_point["data"], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(svr_scores)
print(svr_scores.mean())
print(svr_scores.std())



[-50.89464368 -46.2399396  -48.17215448 -49.94145886 -49.97759298
 -50.04842577 -52.35679046 -45.91497547 -47.93539124 -42.80136578]
-48.428273831448955
2.6815542898604376




# SVR Linear (dados escalonados)

In [3]:
df = pd.read_csv('./melting-point/train_wz_ws.csv')

In [4]:
melting_point={}
melting_point["data"] = df.drop(['Tm'], axis=1).values
melting_point["target"] = df["Tm"].values

In [5]:
from sklearn.svm import LinearSVR
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline

lsvr = LinearSVR(max_iter=1000)

svm_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', lsvr)
])
svm_scores = cross_val_score(svm_pipe, melting_point["data"], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(svm_scores)
print(svm_scores.mean())
print(svm_scores.std())

[-39.22497229 -34.29048806 -36.87164568 -33.02462239 -37.06444579
 -36.95592243 -38.27163374 -36.00979041 -36.50158014 -33.27846564]
-36.14935665661532
1.9385051319664737


In [6]:
from sklearn.model_selection import GridSearchCV
import multiprocessing

parameters = {
    'svm__C': [ 0.1, 0.2, 0.4, 0.5, 1.0, 1.5, 1.8, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0],
    'svm__loss': ['epsilon_insensitive', 'squared_epsilon_insensitive']
}
clf = GridSearchCV(svm_pipe, parameters, n_jobs=multiprocessing.cpu_count(), cv=10, scoring='neg_mean_absolute_error')

In [7]:
clf.fit(melting_point['data'], melting_point['target'])



0,1,2
,estimator,Pipeline(step...LinearSVR())])
,param_grid,"{'svm__C': [0.1, 0.2, ...], 'svm__loss': ['epsilon_insensitive', 'squared_epsilon_insensitive']}"
,scoring,'neg_mean_absolute_error'
,n_jobs,16
,refit,True
,cv,10
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,epsilon,0.0
,tol,0.0001
,C,2.0
,loss,'epsilon_insensitive'
,fit_intercept,True
,intercept_scaling,1.0
,dual,'auto'
,verbose,0
,random_state,
,max_iter,1000


In [8]:
lsvr_scores = cross_val_score(clf.best_estimator_, melting_point["data"], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(lsvr_scores)
print(lsvr_scores.mean())
print(lsvr_scores.std())



[-39.56351753 -34.42066284 -36.86175993 -32.8862417  -36.46169982
 -36.51467599 -38.18954817 -36.18775389 -36.88066885 -32.70794184]
-36.06744705575615
2.0617043516162834




## NuSVR Linear

In [63]:
from sklearn.svm import NuSVR

param_grid = [ 
    {  
        'svm__nu': np.arange(0.05, 1.0, 0.05) 
    } 
]

svm_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', NuSVR(kernel='linear'))
])
 
gs = GridSearchCV(svm_pipe, param_grid=param_grid, scoring='neg_mean_absolute_error', cv=10, n_jobs=multiprocessing.cpu_count()) 

In [64]:
gs.fit(melting_point['data'], melting_point['target'])

0,1,2
,estimator,Pipeline(step...l='linear'))])
,param_grid,"[{'svm__nu': array([0.05, ..., 0.9 , 0.95])}]"
,scoring,'neg_mean_absolute_error'
,n_jobs,16
,refit,True
,cv,10
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,nu,np.float64(0.6000000000000001)
,C,1.0
,kernel,'linear'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,tol,0.001
,cache_size,200
,verbose,False


In [65]:
lsvr_scores = cross_val_score(gs.best_estimator_, melting_point["data"], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(lsvr_scores)
print(lsvr_scores.mean())
print(lsvr_scores.std())

[-38.54129273 -34.16940868 -35.48589362 -33.02738264 -36.38633482
 -35.86249436 -38.43007554 -35.83704428 -36.34673289 -32.15556002]
-35.624221958335546
1.9620863339048376


# SVR Não-Linear (dados escalonados)

In [68]:
from sklearn.svm import SVR

param_grid =  {  
    'svm__kernel': ['rbf', 'poly', 'sigmoid'],
    'svm__C': [ 0.1, 0.2, 0.4, 0.5, 1.0, 1.5, 1.8, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0],
    'svm__degree': [2,3]
} 

svm_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVR(max_iter=1000))
])

gs = GridSearchCV(svm_pipe, param_grid, n_jobs=multiprocessing.cpu_count(), cv=10, scoring='neg_mean_absolute_error')

In [69]:
gs.fit(melting_point['data'], melting_point['target'])



0,1,2
,estimator,Pipeline(step..._iter=1000))])
,param_grid,"{'svm__C': [0.1, 0.2, ...], 'svm__degree': [2, 3], 'svm__kernel': ['rbf', 'poly', ...]}"
,scoring,'neg_mean_absolute_error'
,n_jobs,16
,refit,True
,cv,10
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,kernel,'rbf'
,degree,2
,gamma,'scale'
,coef0,0.0
,tol,0.001
,C,5.0
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False


In [70]:
svr_scores = cross_val_score(gs.best_estimator_, melting_point["data"], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(svr_scores)
print(svr_scores.mean())
print(svr_scores.std())



[-57.33222384 -51.50761334 -55.59625469 -53.93591289 -54.89416071
 -53.58651273 -55.27794379 -52.02655981 -53.34341806 -52.41957819]
-53.99201780373488
1.7115531121322778




# SVR Linear (PCA)

In [82]:
df = pd.read_csv('./melting-point/train_wz_ws.csv')

In [83]:
melting_point={}
melting_point["data"] = df.drop(['Tm'], axis=1).values
melting_point["target"] = df["Tm"].values

In [107]:
from sklearn.svm import LinearSVR
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score

lsvr = LinearSVR(max_iter=1000)

svm_pipe = Pipeline([
    ('pca', PCA(n_components=150)),
    ('svm', lsvr)
])
svm_scores = cross_val_score(svm_pipe, melting_point['data'], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(svm_scores)
print(svm_scores.mean())
print(svm_scores.std())

[-48.96544191 -43.16504755 -44.84655845 -43.02967831 -43.58047576
 -44.1211778  -48.85875203 -42.80007978 -42.91345765 -38.7810889 ]
-44.10617581263354
2.8381998375761484


In [110]:
from sklearn.model_selection import GridSearchCV
import multiprocessing

parameters = {
    'svm__C': [ 0.1, 0.2, 0.4, 0.5, 1.0, 1.5, 1.8, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0],
    'svm__loss': ['epsilon_insensitive', 'squared_epsilon_insensitive']
}
clf = GridSearchCV(svm_pipe, parameters, n_jobs=multiprocessing.cpu_count(), cv=10, scoring='neg_mean_absolute_error')

In [111]:
clf.fit(melting_point['data'], melting_point['target'])

0,1,2
,estimator,Pipeline(step...LinearSVR())])
,param_grid,"{'svm__C': [0.1, 0.2, ...], 'svm__loss': ['epsilon_insensitive', 'squared_epsilon_insensitive']}"
,scoring,'neg_mean_absolute_error'
,n_jobs,16
,refit,True
,cv,10
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,n_components,150
,copy,True
,whiten,False
,svd_solver,'auto'
,tol,0.0
,iterated_power,'auto'
,n_oversamples,10
,power_iteration_normalizer,'auto'
,random_state,

0,1,2
,epsilon,0.0
,tol,0.0001
,C,5.0
,loss,'epsilon_insensitive'
,fit_intercept,True
,intercept_scaling,1.0
,dual,'auto'
,verbose,0
,random_state,
,max_iter,1000


In [112]:
lsvr_scores = cross_val_score(clf.best_estimator_, melting_point["data"], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(lsvr_scores)
print(lsvr_scores.mean())
print(lsvr_scores.std())

[-44.26853616 -39.60915792 -40.3938713  -38.23270477 -40.94752728
 -38.97163854 -44.91980987 -40.3003102  -40.65869605 -35.69750973]
-40.39997618161972
2.554352553697622


# SVR Não-Linear (PCA)