In [1]:
import numpy as np

### This is an implementation of Fuzzy Similiazrity Analysis (FSA) for improvement of Machine learning-based predictions of Wate Quality Parameters

# function for distance calcualtion
def calculate_distance(v1, v2):
    return np.sqrt(np.sum((v1 - v2) ** 2)) 

# FSA function
def FSA(x_training, x_prediction, alpha, beta, k):
    
    # Inputs: 
        #       x_training:Sentinel-2 (S2) feature collection of training data
        #       x_prediction:Sentinel-2 (S2) feature collection of prediction samples
        #       alpha, beta: in FSA formula
        #       k: number_of_nearest
        
    # Outputs:
        #       Weights: non-normalized weights of k similar samples
        #       ensemble_weights: Normalized Weights of k similar samples
        #       Weights_id: Training samples which are selected for FSA-based improvements
        
    
    Phi = np.zeros((x_training.shape[0], x_prediction.shape[0]))
    similarity_scores = np.zeros((x_training.shape[0], x_prediction.shape[0]))
    Weights = np.zeros((k, x_prediction.shape[0]))
    Weights_id = np.zeros((k, x_prediction.shape[0]))
    
    for i in range(x_prediction.shape[0]):
        for j in range(x_training.shape[0]):
            Phi[j, i] = calculate_distance(x_training[j, :], x_prediction[i,:])
            similarity_scores[j,i] = np.exp( -(-np.log(alpha) / (beta ** 2)) * (Phi[j, i] ** 2) )
        
        Nearest_Samples = similarity_scores[:,i].argsort()[0: k]
        Nearest_Distances = similarity_scores[Nearest_Samples,i]
        
        for k_th_similarsample in range(k):
            Weights[k_th_similarsample, i] = (Nearest_Distances[k_th_similarsample]) * np.exp(-Nearest_Distances[k_th_similarsample]/beta)
            Weights_id[k_th_similarsample,i] = Nearest_Samples[k_th_similarsample]
            
    ensemble_weights = Weights / np.sum(Weights, axis = 0)
        
    return Weights, Weights_id, ensemble_weights, similarity_scores