# K-Nearest Neighbors (KNN)

### Importing Python modules

In [1]:
import numpy as np
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix

### Setting Tracking URI in the current folder

In [2]:
mlflow.set_tracking_uri('')
mlflow.tracking.get_tracking_uri()

''

### Creating an experiment and verifying its existence

In [3]:
#mlflow.create_experiment('Results')
mlflow.get_experiment_by_name('Results')

<Experiment: artifact_location='file:///C:/Users/stevan.stanovic/Desktop/MLFlow%20local/Local/mlruns/1', experiment_id='1', lifecycle_stage='active', name='Results', tags={}>

### Importing data
After importing data, the head of dataset looks like:

![Image of head of Churn Modelling](https://raw.githubusercontent.com/StevanStanovic/mlflow/master/1%20-%20MLFlow%20Tracking%20pour%20un%20algorithme%20supervisé/Images/Head_Churn_Modelling.PNG)

In [4]:
def import_data(path):
    dataset = pd.read_csv(path)
    X = dataset.iloc[:, 3:-1].values
    y = dataset.iloc[:, -1].values
    le = LabelEncoder()
    X[:, 2] = le.fit_transform(X[:, 2])
    ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
    X = np.array(ct.fit_transform(X))
    sc = StandardScaler()
    X = sc.fit_transform(X)
    return (X, y)

### Starting a MLFlow experiment

In [5]:
def mlflow_run(exp_id=None, run_name=None):
  
    with mlflow.start_run(run_name=run_name, experiment_id=exp_id) as run:
        
        # Recovering run ID
        run_id = run.info.run_uuid
        
        # Importing and splitting data
        (X, y) = import_data('Churn_Modelling.csv')    
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
        
        # Creating classifier
        classifier = KNeighborsClassifier(n_neighbors = 2, metric = 'minkowski', p = 2)
        
        # Logging tags and parameters
        mlflow.set_tag("Number of parameters", 3)
        mlflow.log_params({
          "n_neighbors" : 2,
          "metric" : "minkowski",
          "p" : 2
        })
        mlflow.set_tag("Theorical information", "metrics->minkowski + p->2 = euclidian norm")
        
        # Fitting the training set and predicting the test set
        classifier.fit(X_train, y_train)
        y_pred = classifier.predict(X_test)
        
        # Storing confusion matrix metrics in variables
        TP, FP, FN, TN = confusion_matrix(y_test, y_pred).ravel()
        sensitivity_recall_TPR = TP/(TP+FN)
        specificity_TNR = TN/(TN+FP)
        precision_PPV = TP/(TP+FP)
        NPV = TN/(TN+FN)
        fallout_FPR = 1 - specificity_TNR
        FDR = 1 - precision_PPV
        accuracy = (TP+TN)/(y_test.shape[0])
        
        # Logging metrics
        mlflow.log_metric("sensitivity_recall_TPR", sensitivity_recall_TPR)
        mlflow.log_metric("specificity_TNR", specificity_TNR)
        mlflow.log_metric("precision_PPV", precision_PPV)
        mlflow.log_metric("NPV", NPV)
        mlflow.log_metric("fallout_FPR", fallout_FPR)
        mlflow.log_metric("FDR", FDR)
        mlflow.log_metric("accuracy", accuracy)
        
        # Logging model
        mlflow.sklearn.log_model(classifier, run_name + "_Model")

        return run_id

### Running the experiment

In [6]:
if __name__=='__main__':
    # Setting experiment ID and run name
    exp_id = ###Set_your_experiment_ID_using_an_integer
    run_name = "KNN"
    run_id = mlflow_run(exp_id, run_name)
    print("Finished experiment !\n exp_id = {} et run_id = {}".format(exp_id, run_id))

Finished experiment !
 exp_id = 1 et run_id = 05a7656e8b2c4997b243da84480b34af
