### -*- Nuha Alghamdi -*-
### -*- nuhaalghamdi92@gmail.com -*-
### -*- Feb 22 2020-*-

* Six multi-class classifier are used with the four types of veectors we generate.
* To download scikit learn library click [here](https://pypi.org/project/scikit-learn/)

In [None]:
import numpy as np

#Here we import the classification models from scikit learn library
from sklearn.linear_model import LogisticRegressionCV             #Logistic Regression
from sklearn.svm import SVC                                       #Support vector classifier
from sklearn.neighbors import KNeighborsClassifier                #K-nearest neighbour
from sklearn.svm import NuSVC                                     #Nu-support vector classifier
from sklearn.gaussian_process import GaussianProcessClassifier    #Gaussian process classifier
from sklearn.gaussian_process.kernels import RBF                  #Kernel for Gaussian process classifier
from sklearn.model_selection import cross_val_score               #For decision tree
from sklearn.tree import DecisionTreeClassifier                   #Decision tree classifier

#For train-test splitting
from sklearn.model_selection import train_test_split

#For precision, recall and f score
from sklearn.metrics import precision_recall_fscore_support

## Load the vectors and their labels (X and y)
## Each run load one of the four following cells

In [None]:
#1
#Vectors are generated by fastText library (sentence vectors)
X = np.load('X_vecs_fastText_vs.npy')
y = np.load('y_labels_fastText_vs.npy')

In [None]:
#2
#Vectors are generated by fastText library (average word vectos)
X = np.load('X_vecs_fastText_vw.npy')
y = np.load('y_labels_fastText_vw.npy')

In [None]:
#3
#Vectors are generated by gensim implementation of fastText (sentence vectors)
X = np.load('X_vecs_gen.npy')
y = np.load('y_labels_gen.npy')

In [None]:
#4
#Vectors are generated by gensim implementation of fastText (average word vectos)
X = np.load('X_vecs_tokenized_gen.npy')
y = np.load('y_labels_tokenized_gen.npy')

## Start Training

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

In [None]:
# logistic regression classifier
clf = LogisticRegressionCV(multi_class='multinomial', verbose=3, random_state=42)

clf.fit(X_train, y_train)
print(clf.score(X_test, y_test))
y_pred=clf.predict(X_test)
print(precision_recall_fscore_support(y_test, y_pred, average='macro'))

In [None]:
#SVM
svm_model_linear = SVC(kernel = 'linear', C = 1).fit(X_train, y_train)
print(svm_model_linear.score(X_test,y_test))
y_pred=svm_model_linear.predict(X_test)
print(precision_recall_fscore_support(y_test, y_pred, average='macro'))

In [None]:
#KNN
knn = KNeighborsClassifier(n_neighbors = 3).fit(X_train, y_train) 
print(knn.score(X_test,y_test))
y_pred=knn.predict(X_test)
print(precision_recall_fscore_support(y_test, y_pred, average='macro'))

In [None]:
#NuSVC
clf = NuSVC()
clf.fit(X_train, y_train)
print(clf.score(X_test, y_test))
y_pred=clf.predict(X_test)
print(precision_recall_fscore_support(y_test, y_pred, average='macro'))

In [None]:
#GaussianProcess
kernel = 1.0 * RBF(1.0)
gpc = GaussianProcessClassifier(multi_class = "one_vs_one", kernel=kernel,random_state=0).fit(X, y)

In [None]:
#Decision Tree
clf = DecisionTreeClassifier(random_state=0).fit(X_train,y_train)
print(clf.score(X_test, y_test))
y_pred=clf.predict(X_test)
print(precision_recall_fscore_support(y_test, y_pred, average='macro'))