In [None]:
from sklearn.neighbors import NearestNeighbors

class SMOTE:
    """SMOTE
    
    Performs SMOTE resampling to address class imbalance.
    
    Parameters
    ----------
    k_neighbors : int, default=5
        The number of nearest neighbors.
        
    Attributes
    ----------
    k_neighbors_ : int
        The number of nearest neighbors.
    """

    def __init__(self, k_neighbors=5):
        self.k_neighbors_ = k_neighbors

    def fit(self, X):
        """Fit SMOTE on a training set, by looking for the `k_neighbors`
        nearest neighbors of each sample.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
           The samples to oversample from.
        """
        self.X = check_array(X)
        self.n_features_in_ = self.X.shape[1]

        # Fit nearest neighbors
        n_neighbors = self.k_neighbors_ + 1
        self.neigh = NearestNeighbors(n_neighbors=n_neighbors)
        self.neigh.fit(self.X)

        return self

    def sample(self, n_samples):
        """
        Generate new synthetic samples from the training samples.

        Parameters
        ----------
        n_samples : int
            The number of new synthetic samples to generate.

        Returns
        -------
        X_new : array-like of shape (n_samples, n_features)
            The new synthetic samples.
        """
        X_new = np.zeros((n_samples, self.n_features_in_))
        
        for i in range(n_samples):
            
            # Pick a sample randomly
            j = np.random.choice(range(self.X.shape[0]))

            # Take the k nearest neighbors around it
            X_j = self.X[j].reshape(1, -1)
            new_neighs = self.neigh.kneighbors(X_j, return_distance=False)
            
            # Keep all columns but the first one as it is X[j] itself
            new_neighs = new_neighs[:,1:]
            
            # Choose one of the k neighbors
            new_neigh_index = np.random.choice(new_neighs[0])  
            
            # Measure the index between X[j] and the randomly chosen neighbor
            distance = self.X[new_neigh_index] - self.X[j] 
            fraction = np.random.random()
            
            # Synthetize a new sample
            X_new[i] = self.X[j] + fraction * distance

        return X_new