In [1]:
from KNN.modelling import KNN_data_collection as knn
from KNN.modelling import generate_predictions as gp
from KNN.assessment import model_metrics as mm
from KNN.assessment import cross_validation as cv

In [2]:
import numpy as np

## Demonstrating exceptions

**KNN classifier using Iris dataset**

*Create model & perform train/test split*

In [3]:
# Create classifier with k=8
iris_classifier=knn.KNN('classifier',8)

Created KNN classifier with k=8!


In [4]:
# Load dataset into classifier and set response variable to "Species"
iris_classifier.load_csv('datasets/Iris.csv','Species')

Dataset successfully loaded!


In [5]:
# Perform a 80/20 train/test split
iris_classifier.train_test_split(0.2)

Successfully completed train/test split!
Training set: 120 samples
Test set: 30 samples


*Generate predictions*

In [6]:
iris_classifier.x_train.shape[1]

4

In [7]:
# Generate a prediction using "generate_predictions" module
   # Generate prediction using fit on entire dataset
gp.generate_prediction(iris_classifier,[6.3,2.3,4.4,1.3],'all')

array('Iris-versicolor', dtype=object)

### Try `generate_prediction` method with invalid number of predictors

In [8]:
# Raises InvalidNumPredictors custom exception
gp.generate_prediction(iris_classifier,[2.3,4.4,1.3],'all')

InvalidNumPredictors: Model training data contains 4 predictors, while inputted observation contains 3 predictors

In [9]:
# Raises InvalidNumPredictors custom exception
gp.generate_prediction(iris_classifier,[2.3,4.4,1.3,1,1],'all')

InvalidNumPredictors: Model training data contains 4 predictors, while inputted observation contains 5 predictors

### Try `euclidean_distance` function with invalid number of predictors

In [10]:
gp.euclidean_distance(np.array([1,2,3]),np.array([1,2]))

Array lengths do not match. Points must be of equal dimensions to compute euclidean distance.


nan

### Try `generate_predictions` method with invalid number of predictors

In [11]:
# Generate multiple predictions
   # Generate prediction using fit on training set only
to_predict=np.array([[6.3,2.3,4.4],[6.8,3.2,5.9],[4.3,3,1.1]])
gp.generate_predictions(iris_classifier,to_predict,'all')

At least one inputted observation contains an invalid number of predictors. Details:
Model training data contains 4 predictors, while inputted observation contains 3 predictors


*Tune k hyperparameter using 6-fold cross validation*

In [12]:
# Create CV instance for 6-fold CV
iris_cv=cv.CvKNN('classifier',6)

Created CV instance for KNN classifier with 6 folds!


In [13]:
# Load dataset into CV instance and set response variable to "Species"
iris_cv.load_csv('datasets/Iris.csv','Species')
# Perform 80/20 train/test split
iris_cv.train_test_split(0.2)

Dataset successfully loaded!
Successfully completed train/test split!
Training set: 120 samples
Test set: 30 samples


### Try creating CvKNN object with improper model type

In [19]:
# Raises ValueError exception with explanation
new_cv=cv.CvKNN('cool_model',6)

ValueError: cool_model is not a valid model type, must be "regressor" or "classifier"

### Try `get_cv_results` method before performing cross validation

In [14]:
# Try to get CV results before running perform_cv
# Raises CvNotPerformed custom exception
iris_cv.get_cv_results()

CvNotPerformed: Cross validation has not been performed. Run perform_cv function to access results

### Try `get_best_k` method before performing cross validation

In [15]:
# Raises CvNotPerformed custom exception
iris_cv.get_best_k()

CvNotPerformed: Cross validation has not been performed. Run perform_cv function to get best k

### Try to perform cv with # folds>number of samples in training set

In [16]:
iris_cv=cv.CvKNN('classifier',121)

Created CV instance for KNN classifier with 121 folds!


In [17]:
# Load dataset into CV instance and set response variable to "Species"
iris_cv.load_csv('datasets/Iris.csv','Species')
# Perform 80/20 train/test split
iris_cv.train_test_split(0.2)

Dataset successfully loaded!
Successfully completed train/test split!
Training set: 120 samples
Test set: 30 samples


In [18]:
# Raises TooManyFolds custom exception
iris_cv.perform_cv([1,2,3,4,5])

TooManyFolds: Number of folds must be less than or equal to number of samples in training set. 121-fold CV requested, but only 120 samples exist in training set.