# Lithium Blood-Brain-Barrier Penetration Classification

### Imports

In [1]:
from smdt import datasets
from smdt import molecular_descriptors
from smdt import classification_models

### Data

In [2]:
LiBloodBarrier = datasets.load_LiBloodBarrier()

References: Hu Li, Chun Wei Yap, Choong Yong Ung, Ying Xue, Zhi Wei Cao and Yu Zong Chen, J. Chem. Inf. Model. 2005


### Descriptors

In [3]:
data = molecular_descriptors.getAllDescriptors(LiBloodBarrier)


Calculating Molecular Descriptors...
Row 387 out of 387
Calculating Molecular Descriptors Completed.


In [4]:
y = data['Target']
X = data.drop(['Target'], axis=1)

In [5]:
from sklearn.preprocessing import Imputer
a = Imputer(missing_values='NaN', strategy='mean', axis=0)
X = a.fit_transform(X)

### Models

#### Gaussian Naive Bayes

In [6]:
model1 = classification_models.fit_GaussianNB(X,y,20)

Training data GridSearchCV accuracy: 0.82414
Testing Data Classification accuracy: 0.81443

Classification Report:
             precision    recall  f1-score   support

          n       0.73      0.69      0.71        32
          p       0.85      0.88      0.86        65

avg / total       0.81      0.81      0.81        97



#### K Nearest Neighbors

In [7]:
model2 = classification_models.fit_KNearestNeighbors(X,y,20)

Training data GridSearchCV accuracy: 0.83448
Testing Data Classification accuracy: 0.84536

Classification Report:
             precision    recall  f1-score   support

          n       0.76      0.78      0.77        32
          p       0.89      0.88      0.88        65

avg / total       0.85      0.85      0.85        97



#### Linear SVC

In [8]:
model3 = classification_models.fit_LinearSVC(X,y,20)

Training data GridSearchCV accuracy: 0.80690
Testing Data Classification accuracy: 0.73196

Classification Report:
             precision    recall  f1-score   support

          n       0.64      0.44      0.52        32
          p       0.76      0.88      0.81        65

avg / total       0.72      0.73      0.72        97



#### Random Forest Classifier

In [9]:
model4 = classification_models.fit_RandomForestClassifier(X,y,20)

Training data GridSearchCV accuracy: 0.82069
Testing Data Classification accuracy: 0.80412

Classification Report:
             precision    recall  f1-score   support

          n       0.70      0.72      0.71        32
          p       0.86      0.85      0.85        65

avg / total       0.81      0.80      0.80        97



#### SGD Classifier

In [10]:
model5 = classification_models.fit_SGDClassifier(X,y,3)

Training data GridSearchCV accuracy: 0.70345
Testing Data Classification accuracy: 0.67010

Classification Report:
             precision    recall  f1-score   support

          n       0.50      0.59      0.54        32
          p       0.78      0.71      0.74        65

avg / total       0.69      0.67      0.68        97

