[Reference](https://ashhadulislam.medium.com/tipping-the-scales-68b806d3c929)

# K-Nearest Neighbor OveRsampling (KNNOR)

In [1]:
pip install knnor

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting knnor
  Downloading knnor-0.0.4.tar.gz (7.9 kB)
Building wheels for collected packages: knnor
  Building wheel for knnor (setup.py) ... [?25l[?25hdone
  Created wheel for knnor: filename=knnor-0.0.4-py3-none-any.whl size=7048 sha256=2613fb590aaadf46e3ad6c0fc52505794f9cec46ad161cce554cf48a387e7f9d
  Stored in directory: /root/.cache/pip/wheels/fc/98/ae/e32b43c3f33dd287701122e2e5ae099577b0304dc65f754f4c
Successfully built knnor
Installing collected packages: knnor
Successfully installed knnor-0.0.4


In [2]:
from sklearn.datasets import load_breast_cancer
import numpy as np
import collections

from knnor import data_augment

dataset = load_breast_cancer()

(unique, counts) = np.unique(dataset['target'], return_counts=True)

print('Unique values of the target variable', unique)
print('Counts of the target variable :', counts)

X=dataset["data"]
y=dataset["target"]

print("Original shape=",X.shape,y.shape)
elements_count = collections.Counter(y)
# printing the element and the frequency
print("Original distribution:")
for key, value in elements_count.items():
    print(f"{key}: {value}")

Unique values of the target variable [0 1]
Counts of the target variable : [212 357]
Original shape= (569, 30) (569,)
Original distribution:
0: 212
1: 357


In [3]:
knnor=data_augment.KNNOR()
X_new,y_new,_,_=knnor.fit_resample(X,y)
print("Shape after augmentation",X_new.shape,y_new.shape)
elements_count = collections.Counter(y_new)
# printing the element and the frequency
print("Final distribution:")
for key, value in elements_count.items():
    print(f"{key}: {value}")

357 212 (569,)
357 212 (569,)
Shape after augmentation (714, 30) (714,)
Final distribution:
0: 357
1: 357


In [4]:
X_new,y_new,_,_=knnor.fit_resample(X,y,
                              num_neighbors=10, # the number of neighbors that will be used for generation of each artificial point
                              max_dist_point=0.01, # the maximum distance at which the new point will be placed
                              proportion_minority=0.3, # proportion of the minority population that will be used to generate the artificial point
                              final_proportion=2 # final number of minority datapoints
                               # example, if num majority =15 and num minority =5, 
#                                putting final_proportion as 1 will add 10 artificial minority points
                              )
print("Shape after augmentation",X_new.shape,y_new.shape)
elements_count = collections.Counter(y_new)
# printing the element and the frequency
print("Final distribution:")
for key, value in elements_count.items():
    print(f"{key}: {value}")

357 212 (569,)
Shape after augmentation (1071, 30) (1071,)
Final distribution:
0: 714
1: 357
