# S2RSLDB: a comprehensive manually curated, internet-accessible database of the sigma-2 receptor selective ligands

### Imports

In [2]:
from smdt import datasets
from smdt import data_processing
from smdt import molecular_descriptors
from smdt import regression
import pandas as pd

### Data

#### Reading Data

In [3]:
Sigma2 = datasets.load_Sigma2ReceptorLigands()

Reference: 
G. Nastasi, C. Miceli, V. Pittala, M.N. Modica, O. Prezzavento, G. Romeo, A. Rescifina, A. Marrazzo, E. AmataS2RSLDB: a comprehensive manually curated, internet-accessible database of the sigma-2 receptor selective ligandsJ. Cheminform., 9 (2017), p. 3


#### Calculating Descriptors

In [4]:
data = molecular_descriptors.getAllDescriptors(Sigma2)


Calculating Molecular Descriptors...
Row 651 out of 651
Calculating Molecular Descriptors Completed.


#### Data Cleaning

In [5]:
y = data['Target']
X = data.drop(['Target'], axis=1)

In [6]:
from sklearn.preprocessing import Imputer
a = Imputer(missing_values='NaN', strategy='mean', axis=0)
X = a.fit_transform(X)

#### Train Test Split

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Models

#### Random Forest

In [8]:
model1 = regression_models.fit_RandomForestRegressor(X_train, X_test, y_train, y_test, 30)

Training data GridSearchCV best r2 score: 0.44047
Testing Data Regression r2 score: 0.41052


#### Lasso

In [9]:
model2 = regression_models.fit_Lasso(X_train, X_test, y_train, y_test)

Training data GridSearchCV best r2 score: 0.62112
Testing Data Classification r2 score: 0.27691


#### Ridge

In [10]:
model3 = regression_models.fit_Ridge(X_train, X_test, y_train, y_test)

Training data GridSearchCV best r2 score: 0.77636
Testing Data Classification r2 score: -0.10154


#### ElasticNet

In [11]:
model4 = regression_models.fit_ElasticNet(X_train, X_test, y_train, y_test)

Training data GridSearchCV best r2 score: 0.63787
Testing Data Classification r2 score: 0.27838


#### Linear SVR

In [13]:
model5 = regression_models.fit_LinearSVR(X_train, X_test, y_train, y_test, 100)

Training data GridSearchCV best r2 score: 0.22782
Testing Data Classification r2 score: 0.27844
