# Scikit-Learn

##  Create a Classiﬁcation Dataset

In [1]:
from sklearn import datasets
dataset = datasets.make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2)

## Split the Dataset Using 10-fold Cross Validation 

In [2]:
from sklearn import cross_validation 
kf = cross_validation.KFold(len(dataset[0]), n_folds=10, shuffle=True) 
for train_index, test_index in kf: 
    X_train, y_train = dataset[0][train_index], dataset[1][train_index] 
    X_test, y_test = dataset[0][test_index], dataset[1][test_index]



In [3]:
print("X_train:\n", X_train) 
print("y_train:\n", y_train) 
print("X_test:\n", X_test) 
print("y_test:\n", y_test)

X_train:
 [[ 0.22776821  0.70343122 -0.10452921 ... -1.28334848  1.56520986
  -0.27108105]
 [ 0.19913023  0.05398692  0.88551098 ... -0.03375581 -0.83875998
   0.15804097]
 [ 1.09574229 -0.44544795 -1.2489829  ...  0.71739307  0.42019894
   1.10680005]
 ...
 [ 0.32156246  1.17884116  1.24911917 ... -2.04749873  1.09458085
   0.04165762]
 [-0.40109276  0.44451766  1.80462288 ... -0.67544843 -1.01840023
  -0.15304814]
 [ 1.1943236  -0.90673725 -0.44600199 ...  1.61218634 -1.39436964
  -0.62031602]]
y_train:
 [1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1
 1 1 1 1 1 1 0 0 0 1 1 0 0 1 0 0 1 0 0 1 1 0 0 1 0 0 0 0 1 0 1 1 1 1 1 0 1
 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 1 0 0 1 0 0 0 0 1 1 0 1 1 1 1 1 0 1
 1 1 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 1 0 1 0 1 1 0 1 1 0 1 0 1 1 1 1 1
 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 1 1 0 1 1 0 0 0 1 1 0 0 0 1 0 0 0 0 1 1 0 0
 1 1 1 0 1 1 0 0 0 1 1 0 0 0 1 1 0 0 0 0 1 1 1 0 1 1 1 0 0 0 0 1 1 1 0 0 1
 1 1 0 0 1 1 0 0 0 0 1 0 0 1 0 1 1 1 0

## Train the Algorithms 

In [4]:
from sklearn import metrics 

### Gaussian NB

In [5]:
# Naive Bayes 
from sklearn.naive_bayes import GaussianNB 
clf = GaussianNB() 
clf.fit(X_train, y_train) 
pred = clf.predict(X_test)

In [6]:
print("Predict:\n", pred)
print("y_test:\n", y_test)

Predict:
 [0 1 0 1 1 0 1 0 0 0 0 1 0 1 1 0 0 0 1 0 0 1 0 1 0 0 0 0 1 1 0 1 0 1 0 0 0
 0 1 0 0 1 1 0 1 0 1 0 1 0 1 1 0 1 1 1 0 1 0 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1
 1 0 1 0 1 0 1 0 1 0 0 0 1 1 0 1 0 1 0 0 1 1 0 1 0 1]
y_test:
 [1 1 0 0 1 0 1 0 1 0 0 0 0 1 1 1 1 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 0 1 0 0 0
 1 1 0 0 1 1 1 1 0 1 0 1 0 1 0 0 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 1 1 0 0 1 0
 1 1 1 1 1 0 1 0 1 0 0 0 1 1 1 1 0 1 0 0 1 1 0 1 0 1]


In [7]:
acc = metrics.accuracy_score(y_test, pred) 
print("Accuracy:\n", acc)
f1 = metrics.f1_score(y_test, pred) 
print("F1 Score:\n", f1) 
auc = metrics.roc_auc_score(y_test, pred) 
print("AUC ROC:\n", auc)

Accuracy:
 0.75
F1 Score:
 0.766355140186916
AUC ROC:
 0.7505050505050506


### SVM

#### $C=1e-2$

In [8]:
#SVM C=1e-2
from sklearn.svm import SVC 
clf = SVC(C=1e-02, kernel='rbf', gamma=0.1) 
clf.fit(X_train, y_train) 
pred = clf.predict(X_test)

In [9]:
print("Predict:\n", pred)
print("y_test:\n", y_test)

Predict:
 [0 1 0 1 1 0 1 0 0 0 0 1 0 1 1 0 0 0 1 0 0 1 0 1 0 0 0 0 1 1 0 1 0 1 0 0 0
 0 1 0 0 1 1 0 1 0 1 0 1 0 1 1 0 1 1 1 0 1 0 0 0 1 1 1 1 1 1 1 1 1 0 0 1 1
 1 0 1 0 1 0 1 0 1 0 0 0 1 1 0 1 0 1 0 0 1 0 0 1 0 1]
y_test:
 [1 1 0 0 1 0 1 0 1 0 0 0 0 1 1 1 1 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 0 1 0 0 0
 1 1 0 0 1 1 1 1 0 1 0 1 0 1 0 0 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 1 1 0 0 1 0
 1 1 1 1 1 0 1 0 1 0 0 0 1 1 1 1 0 1 0 0 1 1 0 1 0 1]


In [10]:
acc = metrics.accuracy_score(y_test, pred) 
print("Accuracy:\n", acc)
f1 = metrics.f1_score(y_test, pred) 
print("F1 Score:\n", f1) 
auc = metrics.roc_auc_score(y_test, pred) 
print("AUC ROC:\n", auc)

Accuracy:
 0.76
F1 Score:
 0.7692307692307693
AUC ROC:
 0.7636363636363637


#### $C=1e-1$

In [11]:
#SVM C=1e-1
from sklearn.svm import SVC 
clf = SVC(C=1e-01, kernel='rbf', gamma=0.1) 
clf.fit(X_train, y_train) 
pred = clf.predict(X_test)

In [12]:
print("Predict:\n", pred)
print("y_test:\n", y_test)

Predict:
 [0 1 0 1 1 0 1 0 0 0 0 1 0 1 1 0 0 0 0 0 0 1 0 1 1 0 1 0 0 1 0 0 0 1 0 0 0
 0 1 0 0 1 1 1 1 0 1 0 1 0 1 1 0 1 1 1 0 1 0 0 0 1 1 1 1 1 1 1 1 1 0 0 1 0
 1 0 1 0 1 0 1 0 1 0 0 0 1 1 0 1 0 1 0 0 1 0 0 1 0 1]
y_test:
 [1 1 0 0 1 0 1 0 1 0 0 0 0 1 1 1 1 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 0 1 0 0 0
 1 1 0 0 1 1 1 1 0 1 0 1 0 1 0 0 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 1 1 0 0 1 0
 1 1 1 1 1 0 1 0 1 0 0 0 1 1 1 1 0 1 0 0 1 1 0 1 0 1]


In [13]:
acc = metrics.accuracy_score(y_test, pred) 
print("Accuracy:\n", acc)
f1 = metrics.f1_score(y_test, pred) 
print("F1 Score:\n", f1) 
auc = metrics.roc_auc_score(y_test, pred) 
print("AUC ROC:\n", auc)

Accuracy:
 0.79
F1 Score:
 0.7961165048543689
AUC ROC:
 0.7949494949494951


#### $C=1e0$

In [14]:
#SVM C=1e0
from sklearn.svm import SVC 
clf = SVC(C=1e0, kernel='rbf', gamma=0.1) 
clf.fit(X_train, y_train) 
pred = clf.predict(X_test)

In [15]:
print("Predict:\n", pred)
print("y_test:\n", y_test)

Predict:
 [0 1 0 1 1 0 1 0 0 0 0 1 0 1 1 0 0 0 0 0 1 1 0 1 1 0 1 0 0 1 0 0 1 1 0 0 0
 0 1 0 0 1 1 1 1 0 1 0 1 0 1 1 0 1 1 1 0 1 0 0 1 1 1 0 1 1 1 1 1 1 0 0 1 0
 1 1 1 0 1 0 1 0 1 0 0 0 1 1 0 1 0 1 0 0 1 0 0 1 0 1]
y_test:
 [1 1 0 0 1 0 1 0 1 0 0 0 0 1 1 1 1 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 0 1 0 0 0
 1 1 0 0 1 1 1 1 0 1 0 1 0 1 0 0 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 1 1 0 0 1 0
 1 1 1 1 1 0 1 0 1 0 0 0 1 1 1 1 0 1 0 0 1 1 0 1 0 1]


In [16]:
acc = metrics.accuracy_score(y_test, pred) 
print("Accuracy:\n", acc)
f1 = metrics.f1_score(y_test, pred) 
print("F1 Score:\n", f1) 
auc = metrics.roc_auc_score(y_test, pred) 
print("AUC ROC:\n", auc)

Accuracy:
 0.82
F1 Score:
 0.8301886792452831
AUC ROC:
 0.8222222222222222


#### $C=1e1$

In [17]:
#SVM C=1e1
from sklearn.svm import SVC 
clf = SVC(C=1e1, kernel='rbf', gamma=0.1) 
clf.fit(X_train, y_train) 
pred = clf.predict(X_test)

In [18]:
print("Predict:\n", pred)
print("y_test:\n", y_test)

Predict:
 [0 0 0 1 1 0 1 0 0 0 0 0 0 1 1 0 1 0 1 0 1 1 0 1 1 0 1 0 0 1 0 0 0 1 0 0 0
 0 1 0 0 1 1 1 1 0 1 0 1 1 1 1 0 0 1 1 0 1 0 0 1 1 1 0 1 1 1 1 1 1 1 0 1 1
 1 0 1 0 1 0 1 0 1 0 0 0 1 1 1 1 0 1 0 0 1 0 0 1 0 1]
y_test:
 [1 1 0 0 1 0 1 0 1 0 0 0 0 1 1 1 1 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 0 1 0 0 0
 1 1 0 0 1 1 1 1 0 1 0 1 0 1 0 0 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 1 1 0 0 1 0
 1 1 1 1 1 0 1 0 1 0 0 0 1 1 1 1 0 1 0 0 1 1 0 1 0 1]


In [19]:
acc = metrics.accuracy_score(y_test, pred) 
print("Accuracy:\n", acc)
f1 = metrics.f1_score(y_test, pred) 
print("F1 Score:\n", f1) 
auc = metrics.roc_auc_score(y_test, pred) 
print("AUC ROC:\n", auc)

Accuracy:
 0.81
F1 Score:
 0.8224299065420562
AUC ROC:
 0.8111111111111111


#### $C=1e2$

In [20]:
#SVM C=1e2
from sklearn.svm import SVC 
clf = SVC(C=1e2, kernel='rbf', gamma=0.1) 
clf.fit(X_train, y_train) 
pred = clf.predict(X_test)

In [21]:
print("Predict:\n", pred)
print("y_test:\n", y_test)

Predict:
 [1 0 0 1 1 0 1 0 0 0 0 0 0 1 1 0 1 0 1 0 1 1 0 1 0 0 1 0 0 1 0 0 0 1 0 0 0
 0 1 0 0 1 1 0 1 0 1 0 1 1 1 0 0 1 1 1 0 1 0 0 1 0 1 0 1 1 1 1 1 1 1 0 1 0
 1 0 1 0 1 0 1 0 1 0 0 0 1 1 1 1 0 1 0 0 1 0 0 1 0 1]
y_test:
 [1 1 0 0 1 0 1 0 1 0 0 0 0 1 1 1 1 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 0 1 0 0 0
 1 1 0 0 1 1 1 1 0 1 0 1 0 1 0 0 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 1 1 0 0 1 0
 1 1 1 1 1 0 1 0 1 0 0 0 1 1 1 1 0 1 0 0 1 1 0 1 0 1]


In [22]:
acc = metrics.accuracy_score(y_test, pred) 
print("Accuracy:\n", acc)
f1 = metrics.f1_score(y_test, pred) 
print("F1 Score:\n", f1) 
auc = metrics.roc_auc_score(y_test, pred) 
print("AUC ROC:\n", auc)

Accuracy:
 0.86
F1 Score:
 0.8653846153846154
AUC ROC:
 0.8646464646464647


### RandomForestClassifier

#### n_estimators = 10

In [23]:
# Random Forest n_estimators = 10
from sklearn.ensemble import RandomForestClassifier 
clf = RandomForestClassifier(n_estimators=10) 
clf.fit(X_train, y_train) 
pred = clf.predict(X_test)

In [24]:
print("Predict:\n", pred)
print("y_test:\n", y_test)

Predict:
 [1 0 0 1 1 0 1 0 0 0 0 1 0 1 1 0 1 0 0 0 1 1 0 1 1 0 1 0 0 1 0 0 0 1 0 0 0
 0 1 0 0 1 1 0 1 0 1 0 1 0 1 1 0 1 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 0 0 1 0
 1 1 1 0 1 0 1 0 1 0 0 0 1 1 0 1 0 1 0 1 1 0 0 1 0 1]
y_test:
 [1 1 0 0 1 0 1 0 1 0 0 0 0 1 1 1 1 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 0 1 0 0 0
 1 1 0 0 1 1 1 1 0 1 0 1 0 1 0 0 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 1 1 0 0 1 0
 1 1 1 1 1 0 1 0 1 0 0 0 1 1 1 1 0 1 0 0 1 1 0 1 0 1]


In [25]:
acc = metrics.accuracy_score(y_test, pred) 
print("Accuracy:\n", acc)
f1 = metrics.f1_score(y_test, pred) 
print("F1 Score:\n", f1) 
auc = metrics.roc_auc_score(y_test, pred) 
print("AUC ROC:\n", auc)

Accuracy:
 0.81
F1 Score:
 0.8224299065420562
AUC ROC:
 0.8111111111111111


#### n_estimators = 100

In [26]:
# Random Forest n_estimators = 100
from sklearn.ensemble import RandomForestClassifier 
clf = RandomForestClassifier(n_estimators=100) 
clf.fit(X_train, y_train) 
pred = clf.predict(X_test)

In [27]:
print("Predict:\n", pred)
print("y_test:\n", y_test)

Predict:
 [0 1 0 1 1 0 1 0 0 0 0 1 0 1 1 0 1 0 0 0 1 1 0 1 1 0 1 0 0 1 0 0 0 1 0 0 0
 0 1 0 0 1 1 0 1 0 1 0 1 1 1 1 0 1 1 1 0 1 0 0 1 1 1 0 1 1 1 1 1 1 0 0 1 0
 1 1 1 0 1 0 1 0 1 0 0 0 1 1 0 1 0 1 0 0 1 0 0 1 0 1]
y_test:
 [1 1 0 0 1 0 1 0 1 0 0 0 0 1 1 1 1 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 0 1 0 0 0
 1 1 0 0 1 1 1 1 0 1 0 1 0 1 0 0 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 1 1 0 0 1 0
 1 1 1 1 1 0 1 0 1 0 0 0 1 1 1 1 0 1 0 0 1 1 0 1 0 1]


In [28]:
acc = metrics.accuracy_score(y_test, pred) 
print("Accuracy:\n", acc)
f1 = metrics.f1_score(y_test, pred) 
print("F1 Score:\n", f1) 
auc = metrics.roc_auc_score(y_test, pred) 
print("AUC ROC:\n", auc)

Accuracy:
 0.82
F1 Score:
 0.8301886792452831
AUC ROC:
 0.8222222222222222


#### n_estimators = 1000

In [29]:
# Random Forest n_estimators = 1000
from sklearn.ensemble import RandomForestClassifier 
clf = RandomForestClassifier(n_estimators=1000) 
clf.fit(X_train, y_train) 
pred = clf.predict(X_test)

In [30]:
print("Predict:\n", pred)
print("y_test:\n", y_test)

Predict:
 [1 0 0 1 1 0 1 0 0 0 0 1 0 1 1 0 1 0 0 0 1 1 0 1 1 0 1 0 0 1 0 0 0 1 0 0 0
 0 1 0 0 1 1 0 1 0 1 0 1 1 1 1 0 1 1 1 0 1 0 0 1 1 1 0 1 1 1 1 1 1 0 0 1 0
 1 1 1 0 1 0 1 0 1 0 0 0 1 1 0 1 0 1 0 0 1 0 0 1 0 1]
y_test:
 [1 1 0 0 1 0 1 0 1 0 0 0 0 1 1 1 1 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 0 1 0 0 0
 1 1 0 0 1 1 1 1 0 1 0 1 0 1 0 0 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 1 1 0 0 1 0
 1 1 1 1 1 0 1 0 1 0 0 0 1 1 1 1 0 1 0 0 1 1 0 1 0 1]


In [31]:
acc = metrics.accuracy_score(y_test, pred) 
print("Accuracy:\n", acc)
f1 = metrics.f1_score(y_test, pred) 
print("F1 Score:\n", f1) 
auc = metrics.roc_auc_score(y_test, pred) 
print("AUC ROC:\n", auc)

Accuracy:
 0.82
F1 Score:
 0.8301886792452831
AUC ROC:
 0.8222222222222222
