In [None]:
import matplotlib.pyplot as plt
import numpy as np
import math

In [None]:
X=np.array([(4,1940),(9,2960),(9,4630),(78,1528),(90,2040),(50,3700),(467,14815),(509,15200),(290,15700),(215,6045)])

x1 = np.array(X[:,0])
x2 = np.array(X[:,1])

fig, ax = plt.subplots()
ax.scatter(x1,x2)
ax.set(xlabel='Seats count', ylabel='Distance range (km)',
       title='Aircrafts')
ax.grid()
plt.show()

In [None]:
train_data = np.array(X)
max_values = train_data.max(0)

X_norm = np.divide(train_data,max_values)

In [None]:
data_set = X_norm
groups = 2
space=[[0,1],[0,1]]

error_margin = 0.001
m = 2.0

assignation=np.zeros((len(X),groups))


centers = []

In [None]:
import random

centers = []

def select_centers():
    global centers
    global groups
    global space
    iter=0
    while iter<groups:
        centers.append((random.uniform(space[0][0],space[0][1]), 
                        random.uniform(space[1][0],space[1][1])))
        iter=iter+1
        
select_centers()
print(centers)

In [None]:
import scipy.linalg as lg

class MahalanobisDistance:

    def set_dataset(self, dataset):
        self.dataset = dataset    

    def get_distance(self, x, v, feature_matrix):
        matrix_A = self.get_inverse_covariance_matrix(feature_matrix)
        result = np.subtract(x, v)
        return np.dot(np.dot(result, matrix_A), result)    
    
    def get_inverse_covariance_matrix(self,feature_matrix):
        print(feature_matrix)
        means_vector=np.mean(feature_matrix,axis=0)
        sum=np.zeros((2,2))
        for i in range(len(feature_matrix)):
            a=np.subtract(feature_matrix[i],means_vector)
            sum=np.add(sum,np.outer(a,a))
        return lg.inv(np.divide(sum,len(feature_matrix)))

In [None]:
mahalanobis_distance = MahalanobisDistance()
mahalanobis_distance.set_dataset(data_set)

In [None]:
def calculate_mah_distance(x1, x2, feature_matrix):
    global mahalanobis_distance
    return mahalanobis_distance.get_distance(x1, x2, feature_matrix)

In [None]:
def calculate_distance(x,v):
    return math.sqrt((x[0]-v[0])**2+(x[1]-v[1])**2)

In [None]:
def calculate_new_centers(u):
    global centers
    global groups
    global data_set
    global m
    new_centers=[]
    for c in range(groups):
        u_x_vector=np.zeros(groups)
        u_scalar=0.0
        for i in range(len(data_set)):
            u_scalar = u_scalar+(u[i][c]**m)
            u_x_vector=np.add(u_x_vector,np.multiply(u[i][c]**m,data_set[i]))
        new_centers.append(np.divide(u_x_vector,u_scalar))
    centers=new_centers

In [None]:
def calculate_eta(x,i):
    global data_set
    global centers
    global assignation
    eta_scalar_top = 0.0
    eta_scalar_bottom = 0.0
    mah_distance = calculate_mah_distance(x,centers[i],data_set)
    for k in range(len(data_set)):
        eta_scalar_bottom = eta_scalar_bottom + (assignation[k][i]**m)
        eta_scalar_top = eta_scalar_top + mah_distance * assignation[k][i]
    return eta_scalar_top*1.0/eta_scalar_bottom*1.0

In [None]:
def calculate_u_fcm(x,i):
    global centers
    if i == 0:
        sum=1.0+(calculate_distance(x, centers[0])/calculate_distance(x, centers[1]))**2
    else:
        sum=1.0+(calculate_distance(x, centers[1])/calculate_distance(x, centers[0]))**2
    return sum**-1

In [None]:
def calculate_u_pcm(x,i):
    global data_set
    global centers
    eta = calculate_eta(x,i)
    if i == 0:
        sum=1.0+((calculate_mah_distance(x, centers[0],data_set))**2)/eta
    else:
        sum=1.0+((calculate_mah_distance(x, centers[1],data_set))**2)/eta
    return sum**-1

In [None]:
def calculate_differences(new_assignation):
    global assignation
    global groups
    diff=0
    for i in range(len(assignation)):
        for j in range(groups):
            diff=diff+ abs(float(new_assignation[i][j])-float(assignation[i][j]))
    return diff

In [None]:
def cluster():
    global assignation
    global data_set
    global groups
    global error_margin
    difference_limit_not_achieved=True
    iter=0
    

    new_assignation=[]    
    for i in range(len(data_set)):
        new_assignation_vector=[]
        for k in range(groups):
            new_assignation_vector.append(calculate_u_fcm(data_set[i],k))
        new_assignation.append(new_assignation_vector)
    calculate_new_centers(new_assignation)
    
    print("Assignation afer FCM:" + str(new_assignation))
    
    while difference_limit_not_achieved:
        new_assignation=[]
        for i in range(len(data_set)):
            new_assignation_vector=[]
            for k in range(groups):
                new_assignation_vector.append(calculate_u_pcm(data_set[i],k))
            new_assignation.append(new_assignation_vector)
        calculate_new_centers(new_assignation)

        if iter>0:
            if calculate_differences(new_assignation) < error_margin:
                difference_limit_not_achieved=False
        assignation=new_assignation
        iter=iter+1

In [None]:
cluster()

In [None]:
print(assignation)
print(centers)

In [None]:
assigned_groups = []
colors = ['red','blue','green','orange','black','yellow']

for el in range(len(X_norm)):
    group_id = np.argmax(assignation[el])
    assigned_groups.append(group_id)
print(assignation)
print(assigned_groups)

In [None]:
def get_colours(color_id):
    global X_norm
    print(color_id)
    print(X_norm[np.where(np.array(assigned_groups)[:]==color_id)])
    return X_norm[np.where(np.array(assigned_groups)[:]==color_id)]

In [None]:
fig, ax = plt.subplots()


for group in range(groups):
#for group in np.unique(assigned_groups):
    small_set = get_colours(group)    
    ax.scatter(small_set[:,0],small_set[:,1],c=colors.pop(0))
# k-means:
ax.scatter(np.array(centers)[:,0],np.array(centers)[:,1],marker='x',c='black')
# ends here
ax.set(xlabel='Seats count', ylabel='Distance range (km)',
       title='Aircrafts (clusters)')
ax.grid()
plt.show()