In [2]:
import numpy as np
import pandas as pd

# SVR Linear (dados originais)

In [3]:
df = pd.read_csv('./melting-point/train_wz_ws.csv')

In [4]:
melting_point={}
melting_point["data"] = df.drop(['Tm'], axis=1).values
melting_point["target"] = df["Tm"].values

In [5]:
from sklearn.svm import LinearSVR
from sklearn.model_selection import cross_val_score

lsvr = LinearSVR(max_iter=1000)

svm_scores = cross_val_score(lsvr, melting_point["data"], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(svm_scores)
print(svm_scores.mean())
print(svm_scores.std())

[-48.64041541 -43.25964342 -43.85396457 -41.77823904 -43.29369319
 -42.81590023 -47.51970608 -41.9765008  -41.6462916  -39.16151757]
-43.394587191344044
2.6591085232237606


In [6]:
from sklearn.model_selection import GridSearchCV
import multiprocessing

parameters = {
    'C': [ 0.1, 0.2, 0.4, 0.5, 1.0, 1.5, 1.8, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0],
    'loss': ['epsilon_insensitive', 'squared_epsilon_insensitive']
}
clf = GridSearchCV(lsvr, parameters, n_jobs=multiprocessing.cpu_count(), cv=10, scoring='neg_mean_absolute_error')

In [7]:
clf.fit(melting_point['data'], melting_point['target'])

0,1,2
,estimator,LinearSVR()
,param_grid,"{'C': [0.1, 0.2, ...], 'loss': ['epsilon_insensitive', 'squared_epsilon_insensitive']}"
,scoring,'neg_mean_absolute_error'
,n_jobs,16
,refit,True
,cv,10
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,epsilon,0.0
,tol,0.0001
,C,1.0
,loss,'squared_epsilon_insensitive'
,fit_intercept,True
,intercept_scaling,1.0
,dual,'auto'
,verbose,0
,random_state,
,max_iter,1000


In [8]:
lsvr_scores = cross_val_score(clf.best_estimator_, melting_point["data"], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(lsvr_scores)
print(lsvr_scores.mean())
print(lsvr_scores.std())

[-37.85560513 -36.47314839 -37.40611646 -34.72217999 -38.22590499
 -38.2376763  -38.34934845 -37.26332945 -38.32286432 -34.27800495]
-37.11341784106711
1.425425817394587


## NuSVR Linear

In [9]:
from sklearn.svm import NuSVR

param_grid = [ 
    {  
        'nu': np.arange(0.05, 1.0, 0.05) 
    } 
] 
 
gs = GridSearchCV(estimator=NuSVR(kernel='linear'), param_grid=param_grid, scoring='neg_mean_absolute_error', cv=10, n_jobs=multiprocessing.cpu_count()) 

In [10]:
gs.fit(melting_point['data'], melting_point['target'])

0,1,2
,estimator,NuSVR(kernel='linear')
,param_grid,"[{'nu': array([0.05, ..., 0.9 , 0.95])}]"
,scoring,'neg_mean_absolute_error'
,n_jobs,16
,refit,True
,cv,10
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,nu,np.float64(0.9500000000000001)
,C,1.0
,kernel,'linear'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,tol,0.001
,cache_size,200
,verbose,False


In [11]:
lsvr_scores = cross_val_score(gs.best_estimator_, melting_point["data"], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(lsvr_scores)
print(lsvr_scores.mean())
print(lsvr_scores.std())

[-48.03750727 -43.07193288 -43.79739241 -42.57738726 -42.45347191
 -42.73159017 -47.02292583 -42.9154131  -42.75627961 -38.34620502]
-43.37101054752479
2.5119441292608498


# SVR Não-Linear (dados originais)

In [12]:
from sklearn.svm import SVR

param_grid =  {  
    'kernel': ['rbf', 'poly', 'sigmoid'],
    'C': [ 0.1, 0.2, 0.4, 0.5, 1.0, 1.5, 1.8, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0]
} 

gs = GridSearchCV(SVR(max_iter=1000), param_grid, n_jobs=multiprocessing.cpu_count(), cv=10, scoring='neg_mean_absolute_error')

In [13]:
gs.fit(melting_point['data'], melting_point['target'])



0,1,2
,estimator,SVR(max_iter=1000)
,param_grid,"{'C': [0.1, 0.2, ...], 'kernel': ['rbf', 'poly', ...]}"
,scoring,'neg_mean_absolute_error'
,n_jobs,16
,refit,True
,cv,10
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,tol,0.001
,C,5.0
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False


In [14]:
svr_scores = cross_val_score(gs.best_estimator_, melting_point["data"], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(svr_scores)
print(svr_scores.mean())
print(svr_scores.std())



[-51.0185633  -46.32309429 -48.01425236 -49.5881467  -50.77771317
 -50.3115404  -52.58274077 -46.00719179 -48.11902379 -42.90666729]
-48.56489338421752
2.743046054808106




# SVR Linear (dados escalonados)

In [18]:
df = pd.read_csv('./melting-point/train_wz_ws.csv')

In [19]:
melting_point={}
melting_point["data"] = df.drop(['Tm'], axis=1).values
melting_point["target"] = df["Tm"].values

In [20]:
from sklearn.svm import LinearSVR
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline

lsvr = LinearSVR(max_iter=1000)

svm_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', lsvr)
])
svm_scores = cross_val_score(svm_pipe, melting_point["data"], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(svm_scores)
print(svm_scores.mean())
print(svm_scores.std())

[-39.35840919 -34.47855291 -36.90738417 -32.94164745 -37.0566346
 -37.05910329 -38.19266484 -36.14594291 -36.62333729 -33.38215848]
-36.21458351265581
1.939562258152646


In [21]:
from sklearn.model_selection import GridSearchCV
import multiprocessing

parameters = {
    'svm__C': [ 0.1, 0.2, 0.4, 0.5, 1.0, 1.5, 1.8, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0],
    'svm__loss': ['epsilon_insensitive', 'squared_epsilon_insensitive']
}
clf = GridSearchCV(svm_pipe, parameters, n_jobs=multiprocessing.cpu_count(), cv=10, scoring='neg_mean_absolute_error')

In [22]:
clf.fit(melting_point['data'], melting_point['target'])



0,1,2
,estimator,Pipeline(step...LinearSVR())])
,param_grid,"{'svm__C': [0.1, 0.2, ...], 'svm__loss': ['epsilon_insensitive', 'squared_epsilon_insensitive']}"
,scoring,'neg_mean_absolute_error'
,n_jobs,16
,refit,True
,cv,10
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,epsilon,0.0
,tol,0.0001
,C,2.0
,loss,'epsilon_insensitive'
,fit_intercept,True
,intercept_scaling,1.0
,dual,'auto'
,verbose,0
,random_state,
,max_iter,1000


In [23]:
lsvr_scores = cross_val_score(clf.best_estimator_, melting_point["data"], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(lsvr_scores)
print(lsvr_scores.mean())
print(lsvr_scores.std())



[-39.66709564 -34.41116977 -36.81610978 -32.82996026 -36.47453806
 -36.85518918 -38.07720296 -36.38701339 -37.02539935 -32.78705135]
-36.13307297362487
2.08100082027675




## NuSVR Linear

In [24]:
from sklearn.svm import NuSVR

param_grid = [ 
    {  
        'svm__nu': np.arange(0.05, 1.0, 0.05) 
    } 
]

svm_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', NuSVR(kernel='linear'))
])
 
gs = GridSearchCV(svm_pipe, param_grid=param_grid, scoring='neg_mean_absolute_error', cv=10, n_jobs=multiprocessing.cpu_count()) 

In [25]:
gs.fit(melting_point['data'], melting_point['target'])

0,1,2
,estimator,Pipeline(step...l='linear'))])
,param_grid,"[{'svm__nu': array([0.05, ..., 0.9 , 0.95])}]"
,scoring,'neg_mean_absolute_error'
,n_jobs,16
,refit,True
,cv,10
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,nu,np.float64(0.6000000000000001)
,C,1.0
,kernel,'linear'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,tol,0.001
,cache_size,200
,verbose,False


In [26]:
lsvr_scores = cross_val_score(gs.best_estimator_, melting_point["data"], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(lsvr_scores)
print(lsvr_scores.mean())
print(lsvr_scores.std())

[-38.56392274 -34.29378441 -35.57267659 -33.00558793 -36.38724131
 -35.99463231 -38.23844596 -36.01326731 -36.45318351 -32.22672105]
-35.674946310827956
1.9282425091261377


# SVR Não-Linear (dados escalonados)

In [27]:
from sklearn.svm import SVR

param_grid =  {  
    'svm__kernel': ['rbf', 'poly', 'sigmoid'],
    'svm__C': [ 0.1, 0.2, 0.4, 0.5, 1.0, 1.5, 1.8, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0],
    'svm__degree': [2,3]
} 

svm_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', SVR(max_iter=1000))
])

gs = GridSearchCV(svm_pipe, param_grid, n_jobs=multiprocessing.cpu_count(), cv=10, scoring='neg_mean_absolute_error')

In [28]:
gs.fit(melting_point['data'], melting_point['target'])



0,1,2
,estimator,Pipeline(step..._iter=1000))])
,param_grid,"{'svm__C': [0.1, 0.2, ...], 'svm__degree': [2, 3], 'svm__kernel': ['rbf', 'poly', ...]}"
,scoring,'neg_mean_absolute_error'
,n_jobs,16
,refit,True
,cv,10
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,kernel,'rbf'
,degree,2
,gamma,'scale'
,coef0,0.0
,tol,0.001
,C,5.0
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False


In [29]:
svr_scores = cross_val_score(gs.best_estimator_, melting_point["data"], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(svr_scores)
print(svr_scores.mean())
print(svr_scores.std())



[-57.84458361 -51.50530263 -55.671725   -53.86946909 -54.68956505
 -54.03207125 -55.34643768 -52.04221445 -53.58646605 -52.71834186]
-54.13061766692478
1.7771166620418526




# SVR Linear (PCA)

In [82]:
df = pd.read_csv('./melting-point/train_wz_ws.csv')

In [83]:
melting_point={}
melting_point["data"] = df.drop(['Tm'], axis=1).values
melting_point["target"] = df["Tm"].values

In [107]:
from sklearn.svm import LinearSVR
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score

lsvr = LinearSVR(max_iter=1000)

svm_pipe = Pipeline([
    ('pca', PCA(n_components=150)),
    ('svm', lsvr)
])
svm_scores = cross_val_score(svm_pipe, melting_point['data'], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(svm_scores)
print(svm_scores.mean())
print(svm_scores.std())

[-48.96544191 -43.16504755 -44.84655845 -43.02967831 -43.58047576
 -44.1211778  -48.85875203 -42.80007978 -42.91345765 -38.7810889 ]
-44.10617581263354
2.8381998375761484


In [110]:
from sklearn.model_selection import GridSearchCV
import multiprocessing

parameters = {
    'svm__C': [ 0.1, 0.2, 0.4, 0.5, 1.0, 1.5, 1.8, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0],
    'svm__loss': ['epsilon_insensitive', 'squared_epsilon_insensitive']
}
clf = GridSearchCV(svm_pipe, parameters, n_jobs=multiprocessing.cpu_count(), cv=10, scoring='neg_mean_absolute_error')

In [111]:
clf.fit(melting_point['data'], melting_point['target'])

0,1,2
,estimator,Pipeline(step...LinearSVR())])
,param_grid,"{'svm__C': [0.1, 0.2, ...], 'svm__loss': ['epsilon_insensitive', 'squared_epsilon_insensitive']}"
,scoring,'neg_mean_absolute_error'
,n_jobs,16
,refit,True
,cv,10
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,n_components,150
,copy,True
,whiten,False
,svd_solver,'auto'
,tol,0.0
,iterated_power,'auto'
,n_oversamples,10
,power_iteration_normalizer,'auto'
,random_state,

0,1,2
,epsilon,0.0
,tol,0.0001
,C,5.0
,loss,'epsilon_insensitive'
,fit_intercept,True
,intercept_scaling,1.0
,dual,'auto'
,verbose,0
,random_state,
,max_iter,1000


In [112]:
lsvr_scores = cross_val_score(clf.best_estimator_, melting_point["data"], melting_point["target"], cv=10, scoring='neg_mean_absolute_error')
print(lsvr_scores)
print(lsvr_scores.mean())
print(lsvr_scores.std())

[-44.26853616 -39.60915792 -40.3938713  -38.23270477 -40.94752728
 -38.97163854 -44.91980987 -40.3003102  -40.65869605 -35.69750973]
-40.39997618161972
2.554352553697622


# SVR Não-Linear (PCA)