### Mounting google drive


In [None]:
from google.colab import drive
drive.mount('/gdrive', force_remount = True)

Mounted at /gdrive


### Importing necessary libraries

In [None]:
import numpy as np
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix
from sklearn.model_selection import cross_validate
import matplotlib.pyplot as plt

### Support Vector Classifier

In [None]:
parameters = [
        
            {
              "kernel": ["rbf", "poly," "linear"],
              "C": [0.1, 0.5, 1, 5],
              "gamma": [0.01, 0.05, 0.1]
            }
        ]

In [None]:
def svm_classifier(X_train, X_test, y_train, y_test, parameters):
  '''The following function when called will perform an exhaustive search 
      over all the para,eters to find the set of parameters which give 
      the best fit of parameters with lowest cross validation error'''
  svm = SVC(probability = True)

  print("finding best value for C and gamma")
  print("################")

  #Finding best parameters C and gamma using GridserchCV

  clf = GridSearchCV(svm, param_grid = parameters,  cv=5, n_jobs=-1, verbose=2, refit = 'precision_weighted')
  clf.fit(X_train, y_train.ravel())
  ypred = clf.predict(X_test)

  tn, fp, fn, tp = confusion_matrix(y_test, ypred).ravel()
  
  disp = plot_confusion_matrix(clf, X_test, y_test.ravel(),
                                 cmap=plt.cm.Blues, normalize='true')
  tuned_clf = clf.best_estimator_
  disp.ax_.set_title('Confusion matrix')
  print("###################")
  print()
  acc = clf.score(X_test,y_test.ravel())
  print("Accuracy is", acc)
  print("###################")
  print()
  print(classification_report(y_test, ypred))
  return tuned_clf, ypred, acc, fp

### Random Forest Classifier.

In [None]:
n_estimators = [int(x) for x in np.linspace(start = 50, stop = 500, num = 5)]
max_features = ['sqrt']
max_depth = [int(x) for x in np.linspace(10, 40, num = 5)]
max_depth.append(None)
min_samples_split = [2, 5, 10]
min_samples_leaf = [1, 2, 4]
bootstrap = [True]

parameters = {'n_estimators': n_estimators,
                   'max_features': max_features,
                   'max_depth': max_depth,
                   'min_samples_split': min_samples_split,
                   'min_samples_leaf': min_samples_leaf,
                   'bootstrap': bootstrap}

In [None]:
def Random_forest_clf(X_train, X_test, y_train, y_test, parameters):
  '''The following function uses random forest classifier and GridsearchCV to find
     the best fit for the parameters and plot confusion matrix for the test '''
     
  rfc = RandomForestClassifier()
  clf = GridSearchCV(rfc, param_grid = parameters,  cv=3, n_jobs=-1, verbose=2, refit = 'precision_weighted')
  clf.fit(X_train, y_train.ravel())
  ypred = clf.predict(X_test)
  y_predprob = clf.predict_proba(X_test)

  tn, fp, fn, tp = confusion_matrix(y_test, ypred).ravel()
  disp = plot_confusion_matrix(clf, X_test, y_test.ravel(),
                                 cmap=plt.cm.Blues, normalize = 'true')
  
  tuned_clf = clf.best_estimator_
  disp.ax_.set_title('Confusion matrix')
  print("###################")
  print()
  acc = clf.score(X_test,y_test.ravel())
  print("Accuracy is", acc)
  print("###################")
  print()
  print(classification_report(y_test, ypred))
  return tuned_clf, ypred, acc, fp

### Logistic Regression Classifier.

In [None]:
def logistic_reg_clf(X_train, X_test, y_train, y_test, parameters):
  clf = LogisticRegression(random_state=0).fit(X_train, y_train)
  clf.fit(X_train, y_train.ravel())
  ypred = clf.predict(X_test)
  y_predprob = clf.predict_proba(X_test)

  tn, fp, fn, tp = confusion_matrix(y_test, ypred).ravel()
  disp = plot_confusion_matrix(clf, X_test, y_test.ravel(),
                                 cmap=plt.cm.Blues, normalize = 'true')
  
  tuned_clf = clf.best_estimator_
  disp.ax_.set_title('Confusion matrix')
  print("###################")
  print()
  acc = clf.score(X_test,y_test.ravel())
  print("Accuracy is", acc)
  print("###################")
  print()
  print(classification_report(y_test, ypred))
  return tuned_clf, ypred, acc, fp

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

colors = ['r', 'g', 'b', 'c', 'm']

for i, color in zip(np.unique(y), colors):
    plt.scatter(X_pca[y == i, 0], X_pca[y == i, 1], color=color, label=i)

plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('PCA Scatter Plot')
plt.legend()
plt.grid(True)
plt.show()