# <div style="text-align:center">Classification Algorithms and Evaluation Metrics</div>

In [None]:
import matplotlib.pyplot as plt
# زیر پکیج های سایکیت لرن هستند
# یسری از دیتاست های معروف در سایکیت لرن در ساب پکیج هستند
from sklearn import datasets, svm, metrics
from sklearn.ensemble.forest import RandomForestClassifier
from sklearn.neighbors.classification import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
from sklearn.linear_model.stochastic_gradient import SGDClassifier
from sklearn.feature_selection import chi2
from sklearn.feature_selection.univariate_selection import SelectPercentile, SelectKBest
import numpy as np

In [None]:
# دیتاست mnist را در خودش لود میکند
digits_dataset = datasets.load_digits()
# zip => return a tuple : matrix or list
images_and_labels = list(zip(digits_dataset.images, digits_dataset.target))
images_and_labels

In [None]:
np.shape(digits_dataset.images)

In [None]:
for index, (image, label) in enumerate(images_and_labels[:4]):
    plt.subplot(2, 4, index + 1)
    plt.axis('off')
    plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
    plt.title('Training: %i' % label)

In [None]:
n_samples = len(digits_dataset.images)
n_samples

In [None]:
digits_dataset.images

In [None]:
# transform image dataset into 1797 x 1 matrix
data = digits_dataset.images.reshape((n_samples, -1))
data

In [None]:
# support vector classifier (SVC)
svm_clf = svm.SVC(gamma=0.001)
# Learning Process
svm_clf.fit(data[:n_samples // 2], digits_dataset.target[:n_samples // 2])


In [None]:
expected = digits_dataset.target[n_samples // 2:]
predicted = svm_clf.predict(data[n_samples // 2:])
print(metrics.classification_report(expected, predicted))



In [None]:
print(metrics.confusion_matrix(expected, predicted))

In [None]:
svm2_clf = svm.SVC(kernel='linear')
svm2_clf.fit(data[:n_samples // 2], digits_dataset.target[:n_samples // 2])
expected = digits_dataset.target[n_samples // 2:]
predicted = svm2_clf.predict(data[n_samples // 2:])
print(metrics.classification_report(expected, predicted))

In [None]:
svm3_clf = svm.SVC(gamma=0.001, C=10)
svm3_clf.fit(data[:n_samples // 2], digits_dataset.target[:n_samples // 2])
expected = digits_dataset.target[n_samples // 2:]
predicted = svm3_clf.predict(data[n_samples // 2:])
print(metrics.classification_report(expected, predicted))

In [None]:
rf_clf = RandomForestClassifier()
rf_clf.fit(data[:n_samples // 2], digits_dataset.target[:n_samples // 2])
expected = digits_dataset.target[n_samples // 2:]
predicted = rf_clf.predict(data[n_samples // 2:])
print(metrics.classification_report(expected, predicted))

In [None]:
rf_clf = RandomForestClassifier()
rf_clf.fit(data[:n_samples // 2], digits_dataset.target[:n_samples // 2])
expected = digits_dataset.target[n_samples // 2:]
predicted = rf_clf.predict(data[n_samples // 2:])
print(metrics.classification_report(expected, predicted))

In [None]:
rf2_clf = RandomForestClassifier(n_estimators=1000)
rf2_clf.fit(data[:n_samples // 2], digits_dataset.target[:n_samples // 2])
expected = digits_dataset.target[n_samples // 2:]
predicted = rf2_clf.predict(data[n_samples // 2:])
print(metrics.classification_report(expected, predicted))

In [None]:
rf3_clf = RandomForestClassifier(n_jobs=-1, n_estimators=1000)
rf3_clf.fit(data[:n_samples // 2], digits_dataset.target[:n_samples // 2])
expected = digits_dataset.target[n_samples // 2:]
predicted = rf3_clf.predict(data[n_samples // 2:])
print(metrics.classification_report(expected, predicted))

In [None]:
knn_clf = KNeighborsClassifier()
knn_clf.fit(data[:n_samples // 2], digits_dataset.target[:n_samples // 2])
expected = digits_dataset.target[n_samples // 2:]
predicted = knn_clf.predict(data[n_samples // 2:])
print(metrics.classification_report(expected, predicted))

In [None]:
knn2_clf = KNeighborsClassifier(n_neighbors=100, weights='distance')
knn2_clf.fit(data[:n_samples // 2], digits_dataset.target[:n_samples // 2])
expected = digits_dataset.target[n_samples // 2:]
predicted = knn2_clf.predict(data[n_samples // 2:])
print(metrics.classification_report(expected, predicted))

In [None]:
knn2_clf = KNeighborsClassifier(algorithm='ball_tree')
knn2_clf.fit(data[:n_samples // 2], digits_dataset.target[:n_samples // 2])
expected = digits_dataset.target[n_samples // 2:]
predicted = knn2_clf.predict(data[n_samples // 2:])
print(metrics.classification_report(expected, predicted))

In [None]:
mnb_clf = MultinomialNB()
mnb_clf.fit(data[:n_samples // 2], digits_dataset.target[:n_samples // 2])
expected = digits_dataset.target[n_samples // 2:]
predicted = mnb_clf.predict(data[n_samples // 2:])
print(metrics.classification_report(expected, predicted))

In [None]:
mnb2_clf = MultinomialNB(alpha=0)
mnb2_clf.fit(data[:n_samples // 2], digits_dataset.target[:n_samples // 2])
expected = digits_dataset.target[n_samples // 2:]
predicted = mnb2_clf.predict(data[n_samples // 2:])
print(metrics.classification_report(expected, predicted))

In [None]:
gnb_clf = GaussianNB()
gnb_clf.fit(data[:n_samples // 2], digits_dataset.target[:n_samples // 2])
expected = digits_dataset.target[n_samples // 2:]
predicted = gnb_clf.predict(data[n_samples // 2:])
print(metrics.classification_report(expected, predicted))

In [None]:
bnb_clf = BernoulliNB()
bnb_clf.fit(data[:n_samples // 2], digits_dataset.target[:n_samples // 2])
expected = digits_dataset.target[n_samples // 2:]
predicted = bnb_clf.predict(data[n_samples // 2:])
print(metrics.classification_report(expected, predicted))

In [None]:
sgd_clf = SGDClassifier()
sgd_clf.fit(data[:n_samples // 2], digits_dataset.target[:n_samples // 2])
expected = digits_dataset.target[n_samples // 2:]
predicted = sgd_clf.predict(data[n_samples // 2:])
print(metrics.classification_report(expected, predicted))

In [None]:
sgd2_clf = SGDClassifier(alpha=2)
sgd2_clf.fit(data[:n_samples // 2], digits_dataset.target[:n_samples // 2])
expected = digits_dataset.target[n_samples // 2:]
predicted = sgd2_clf.predict(data[n_samples // 2:])
print(metrics.classification_report(expected, predicted))

In [None]:
ch2 = SelectPercentile(chi2, 80)
X_train = ch2.fit_transform(data, digits_dataset.target)
print(np.shape(data))
print(np.shape(X_train))

In [None]:
ch2.scores_

In [None]:
sgd3_clf = SGDClassifier(alpha=2)
sgd3_clf.fit(X_train[:n_samples // 2], digits_dataset.target[:n_samples // 2])
expected = digits_dataset.target[n_samples // 2:]
predicted = sgd3_clf.predict(X_train[n_samples // 2:])
print(metrics.classification_report(expected, predicted))