#### - Usually when you "Ensemble" between the results of two or more notebooks, you specify coefficients that are multiplied in all rows. In this way, the results will probably get better in some rows, and at the same time, in some other rows, the results may get worse. However, if the results are generally better, we consider this "ensemble" successful.

#### - In any case, we should not forget that finding the coefficients for "Ensembling" with our eyes closed will probably make the results of some rows worse, but sometimes we do not realize this, because our overall score has improved anyway. 

#### - By the way, if the results of the notebooks are two or more dimensions, it will be really harder to choose the coefficients for "Ensembling", and only by guessing or a lot of trial and error, maybe it can be successful.

#### - In this notebook, we will share our innovative method for "Coordinate [One by One]" the results and you will see that for each row, we perform separate calculations and And we determine the order of proximity of all points in a row. Then we can use the point that has the highest score in this row as the main basis and, for example, combine the value of this point with the point closest to itself (Blend or Snap).

#### - We use "NearestNeighbors" for each row, which makes the calculations a bit slow. Of course you can use other methods.

In [None]:
import numpy as np 
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt
%matplotlib inline
!ls ../input/*

In [None]:
SAMPLE = pd.read_csv('../input/smartphone-decimeter-2022/sample_submission.csv')
display(SAMPLE)

In [None]:
path0 = '../input/gsdc224870/submission.csv' 
path1 = '../input/gsdc224823/submission.csv'
path2 = '../input/gsdc224581/submission.csv' 
path3 = '../input/gsdc224376/submission.csv' 
path4 = '../input/gsdc223355/submission.csv'
path5 = '../input/gsdc223017/submission.csv'

path  = [path0, path1, path2, path3, path4, path5]

In [None]:
QT = [[], [], [], [], [], []]
QN = [[], [], [], [], [], []]

for k in range(len(path)):    
    sub_k = pd.read_csv(path[k]).values  
    PT = []
    PN = []    
    for j in range(len(SAMPLE)):
        PT.append([sub_k[j][2]])     
        PN.append([sub_k[j][3]])   
    QT[k] = PT  
    QN[k] = PN  

In [None]:
def near_plt(points, best_score, support, best_1, generated):
    
    plt.style.use('seaborn-whitegrid') 
    plt.figure(figsize=(10, 10), facecolor='lightblue')
    plt.title(f'\nC O O R D I N A T E\n\n{SAMPLE.iloc[i][:2]}')   
    
    plt.scatter(points[0], points[1], s=200, facecolor='lightblue', edgecolor='black', label='All Points')
    plt.scatter(best_score[0], best_score[1], s=200, facecolor='violet', edgecolor='black', label='Best Score')
    plt.scatter(support[0], support[1], s=200, facecolor='yellow', edgecolor='black', label='Support')    
    plt.scatter(generated[0], generated[1], s=150, marker='x', label='Generated')
    plt.scatter(best_1[0], best_1[1], s=150, marker='x', label='Best-1 (To Check)')
   
    plt.legend(fontsize=12)
    plt.xlabel('LatitudeDegrees', fontsize=12)
    plt.ylabel('LongitudeDegrees', fontsize=12)
    plt.savefig(f'Coordinate_{i}.png')
    plt.show()

In [None]:
from sklearn.neighbors import NearestNeighbors
random_examples = np.random.randint(len(SAMPLE), size=10) # Number of examples: 10

T = [] # LatitudeDegrees
N = [] # LongitudeDegrees

for i in range(len(SAMPLE)): 
    XT = [QT[0][i], QT[1][i], QT[2][i], QT[3][i], QT[4][i], QT[5][i]]
    XN = [QN[0][i], QN[1][i], QN[2][i], QN[3][i], QN[4][i], QN[5][i]]
    
    nbrs = NearestNeighbors(n_neighbors=len(XT), algorithm='ball_tree').fit(XT)    
    _ , indices_T = nbrs.kneighbors(XT)
    
    nbrs = NearestNeighbors(n_neighbors=len(XN), algorithm='ball_tree').fit(XN)    
    _ , indices_N = nbrs.kneighbors(XN)
 
    tt = (1.00 * XT[indices_T[-1][0]][0]) + (-0.00 * XT[indices_T[-1][1]][0]) + (0.00 * XT[indices_T[-1][2]][0])
    T.append(tt) 
    
    nn = (1.36 * XN[indices_N[-1][0]][0]) + (-0.36 * XN[indices_N[-1][1]][0]) + (0.00 * XN[indices_N[-1][2]][0])    
    N.append(nn) 
    
    if i in random_examples:
        print(f'\n\n\nRow >>>>> {i}\n{SAMPLE.iloc[i][:2]}')
        near_plt([XT, XN], [XT[indices_T[-1][0]], XN[indices_N[-1][0]]],
                 [XT[indices_T[-1][1]], XN[indices_N[-1][1]]], [XT[-2], XN[-2]], [tt, nn])


In [None]:
sub = SAMPLE.copy()
sub['LatitudeDegrees']  = T
sub['LongitudeDegrees'] = N
sub

In [None]:
sub.to_csv("submission.csv", index=False)
!ls

In [None]:
def ensembling(main, support, coeff1, coeff2): 
    
    suba  = main.copy() 
    subav = suba.values
       
    subb  = support.copy()
    subbv = subb.values    
           
    ense  = main.copy()    
    ensev = ense.values  
 
    for i in range (len(main)):
        
        pera1 = subav[i, 2]
        pera2 = subav[i, 3]
        
        perb1 = subbv[i, 2]
        perb2 = subbv[i, 3]

        per1 = (pera1 * coeff1) + (perb1 * (1.0 - coeff1))
        per2 = (pera2 * coeff2) + (perb2 * (1.0 - coeff2))
        
        ensev[i, 2] = per1
        ensev[i, 3] = per2
        
    ense.iloc[:, 2:] = ensev[:, 2:]  
  
    return ense      

In [None]:
ensemble = ensembling(pd.read_csv(path5), pd.read_csv(path4), 0.50, 0.50)
ensemble

In [None]:
#ensemble.to_csv("ensembling.csv", index=False)
#!ls