# Lithium Blood-Brain-Barrier Penetration Classification

### Imports

In [1]:
from smdt import datasets
from smdt import molecular_descriptors
from smdt import classification

### Data

#### Data Loading

In [2]:
LiBloodBarrier = datasets.load_LiBloodBarrier()

Reference: 
Hu Li, Chun Wei Yap, Choong Yong Ung, Ying Xue, Zhi Wei Cao and Yu Zong Chen, J. Chem. Inf. Model. 2005


#### Descriptor Calculation

In [3]:
data = molecular_descriptors.getAllDescriptors(LiBloodBarrier)


Calculating Molecular Descriptors...
Row 387 out of 387
Calculating Molecular Descriptors Completed.


#### Data Cleaning

In [4]:
y = data['Target']
X = data.drop(['Target'], axis=1)

In [5]:
from sklearn.preprocessing import Imputer
a = Imputer(missing_values='NaN', strategy='mean', axis=0)
X = a.fit_transform(X)

#### Train Test Split

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

### Models

#### Gaussian Naive Bayes

In [25]:
model1 = classification.fit_GaussianNB(X_train, X_test, y_train, y_test, 29)

Training data GridSearchCV accuracy: 0.79655
Testing Data Classification accuracy: 0.79381

Classification Report:
             precision    recall  f1-score   support

          n       0.75      0.56      0.64        32
          p       0.81      0.91      0.86        65

avg / total       0.79      0.79      0.79        97



#### K Nearest Neighbors

In [22]:
model2 = classification.fit_KNearestNeighbors(X_train, X_test, y_train, y_test, 44)

Training data GridSearchCV accuracy: 0.81724
Testing Data Classification accuracy: 0.81443

Classification Report:
             precision    recall  f1-score   support

          n       0.79      0.59      0.68        32
          p       0.82      0.92      0.87        65

avg / total       0.81      0.81      0.81        97



#### Linear SVC

In [29]:
model3 = classification.fit_LinearSVC(X_train, X_test, y_train, y_test, 25)

Training data GridSearchCV accuracy: 0.78966
Testing Data Classification accuracy: 0.75258

Classification Report:
             precision    recall  f1-score   support

          n       0.65      0.53      0.59        32
          p       0.79      0.86      0.82        65

avg / total       0.74      0.75      0.75        97



#### Random Forest Classifier

In [33]:
model4 = classification.fit_RandomForestClassifier(X_train, X_test, y_train, y_test, 31)

Training data GridSearchCV accuracy: 0.82069
Testing Data Classification accuracy: 0.79381

Classification Report:
             precision    recall  f1-score   support

          n       0.73      0.59      0.66        32
          p       0.82      0.89      0.85        65

avg / total       0.79      0.79      0.79        97



#### SGD Classifier

In [37]:
model5 = classification.fit_SGDClassifier(X_train, X_test, y_train, y_test, 31)

Training data GridSearchCV accuracy: 0.76552
Testing Data Classification accuracy: 0.82474

Classification Report:
             precision    recall  f1-score   support

          n       0.89      0.53      0.67        32
          p       0.81      0.97      0.88        65

avg / total       0.84      0.82      0.81        97

