In [486]:
# This is a simple code of Minkowski distance-based fuzzy k-nearest neighbor regression (Md-FKNNreg) model, 
# fuzzy k-nearest neighbor regression (FKNNreg), and k-nearest neighbor regression (KNNreg) models
# based on the article: 
    # Kumbure, M.M. and Luukka, P. A generalized fuzzy k-nearest neighbor 
    # regression model based on Minkowski distance. Granular Computing, 7, 657–671 (2022). 
    # https://doi.org/10.1007/s41066-021-00288-w

    # Created by Mahinda Mailagaha Kumbure, 09/2022 
    
# Md-FKNNreg function
# INPUTS:
    # xtrain: train data is a n-by-m data matrix consisting of n patterns and m features(variables)
    # ytrain: n-dimensional output vector of Xtrain data  
    # xtest: Test data is a D-by-m data matrix consisting of D patterns and m features
    # ytest: D-dimensional output vector of Xtest data
    # K: Number of nearest neighbors to be selected
    # p: parameter value of the Minkowski distance, p=2 is for the Euclidean distance
    
# OUTPUT:
    # predicted: Predicted y values for each test pattern in xtest
    
#--------------------------------------------------------------------------------------------------------------

import numpy as np
import pandas as pd
from math import sqrt

# calculate the Minkowski distance (when p=2 we can apply the Euclidean distance) --> use p=2 for KNNreg, FKNNreg 
def minkowski_distance(a, b, p):
	return sum(abs(e1-e2)**p for e1, e2 in zip(a,b))**(1/p)
 
# find the most similar neighbors
def get_neighbors(xtrain, ytrain, test_sample, K, p):
	distances = list()
	for t in range(len(xtrain)):
		dist  = minkowski_distance(test_sample, xtrain[t], p)
		distances.append((ytrain[t], dist))
	distances.sort(key=lambda tup: tup[1])
	neighbors = list()
	for i in range(K):
		neighbors.append(distances[i]) # neighbors[0]=y values of neighbors, neighbors[1]=distances
	return neighbors

# calculate fuzzy memberships for the neighbors
def get_memberships(neighbors):
    m = 2
    memberships = list()
    for neighbor in neighbors:
        if neighbor[1] == 0:
            memberships.append(0)
        else:
            memberships.append(neighbor[1]**(-1/(m-1)))  
    return(memberships)

# calculate fuzzy weighted average of the outputs of the nearest neighbors (i.e., prediction)
def get_prediction(memberships, neighbors):
    train_y = list()
    for neighbor in neighbors:
        train_y.append(neighbor[0])
    products      = [a * b for a, b in zip(memberships, train_y)]
    predicted_val = sum(products)/sum(memberships)
    return predicted_val


# Functions for the nearest neighbor regression methods
#...............................................................................................................
# Minkowski distanced based Fuzzy k-nearest neighbor regression (Md-FKNNreg) function:
def Md_FKNNreg(xtrain, ytrain, xtest, K, p): 
    yhat_mdfknn = list()
    for test_sample in xtest:
        mdfknn_neighbors  = get_neighbors(xtrain, ytrain, test_sample, K, p)
        mem_mdfknn        = get_memberships(mdfknn_neighbors)
        mdfknn_prediction = get_prediction(mem_mdfknn, mdfknn_neighbors)
        yhat_mdfknn.append(mdfknn_prediction)
    return yhat_mdfknn


# Fuzzy k-nearest neighbor regression (FKNNreg) function:
def FKNNreg(xtrain, ytrain, xtest, K, p=2): 
    yhat_fknn = list()
    for test_sample in xtest:
        fknn_neighbors   = get_neighbors(xtrain, ytrain, test_sample, K, p)
        fknn_memberships = get_memberships(fknn_neighbors)
        fknn_preidiction = get_prediction(fknn_memberships,fknn_neighbors)
        yhat_fknn.append(fknn_preidiction)
    return yhat_fknn


# K-nearest neighbor regression (KNNreg) function:
def KNNreg(xtrain, ytrain, xtest, K, p=2): 
    yhat_knn = list()
    for test_sample in xtest:
        knn_neighbors      = get_neighbors(xtrain, ytrain, test_sample, K, p)
        knn_memberships    = np.ones(K)
        knn_preidiction    = get_prediction(knn_memberships,knn_neighbors)
        yhat_knn.append(knn_preidiction)
    return yhat_knn


In [497]:
# An example case:

from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split

# import simple data set to test the FKNNreg model
data    = pd.read_csv ('qsar_fish_toxicity.csv', sep = ';') # from UCI machine learning repository
sc_data = normalize(data) # data scaling

X = sc_data[:, 0:-1] # features set
y = sc_data[:, -1]   # set of study variable 

# split the data into training and test with a 80/20 split
xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.20)

K      = 10 # define number of nearest neighbors


#----------- Run the nearest neighbor regression methods---------

# Md-FKNNreg
p = 5 # one run Md_FKNNreg method for different values of p
yhat_mdfknn = Md_FKNNreg(xtrain, ytrain, xtest, K, p)

# FKNNreg
yhat_fknn   = FKNNreg(xtrain, ytrain, xtest, K)

# KNNreg
yhat_knn    = KNNreg(xtrain, ytrain, xtest, K)


# Accuracy measures - Md-FKNNreg
mse_f1  = np.mean((ytest - yhat_mdfknn)**2) # mean square error
rmse_f1 = np.sqrt(mse_f1) # root mean square error
r2_f1   = 1-(sum((ytest - yhat_mdfknn)**2)/sum((ytest-np.mean(ytest))**2)) # R2 value

# Accuracy measures - FKNNreg
mse_f2  = np.mean((ytest - yhat_fknn)**2)
rmse_f2 = np.sqrt(mse_f)
r2_f2   = 1-(sum((ytest - yhat_fknn)**2)/sum((ytest-np.mean(ytest))**2))

# Accuracy measures - KNNreg
mse_f3  = np.mean((ytest - yhat_knn)**2)
rmse_f3 = np.sqrt(mse_f)
r2_f3   = 1-(sum((ytest - yhat_knn)**2)/sum((ytest-np.mean(ytest))**2))

# print the results
print("Md-FKNNreg Results:")
print("MSE:", mse_f1)
print("RMSE:", rmse_f1)
print("R-Squared:", r2_f1)
print(" ")

print("FKNNreg Results:")
print("MSE:", mse_f)
print("RMSE:", rmse_f)
print("R-Squared:", r2_f)
print(" ")

print("KNNreg Results:")
print("MSE:", mse_f2)
print("RMSE:", rmse_f2)
print("R-Squared:", r2_f2)