In [1]:
import numpy as np

### Evaluation Metrics

In [2]:
true_A = [0] * 60
pred_A = ([0] * 46) + ([1] * 12) + ([2] * 2)

true_B = [1] * 70
pred_B = ([0] * 9) + ([1] * 61)

true_C = [2] * 30
pred_C = ([0] * 2) + ([1] * 3) + ([2] * 25)

In [3]:
true_Y = true_A + true_B + true_C
pred_Y = pred_A + pred_B + pred_C

In [4]:
matrix = np.stack((pred_Y, true_Y), axis=1)

In [5]:
matrix

array([[0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [0, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [2, 0],
       [2, 0],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0, 1],
       [0,

In [6]:
def calculate_f1_score(matrix):
    """
        Args:
            matrix        (numpy.ndarray): Matrix of dimension N x 2 
                                     where First column = Predicted Class
                                           Second column = True Class
        Returns:
            f1_score      (numpy.ndarray): f1_score of each class
    """
    
    size = len(set(matrix[:, 0])) 
    con_mat = np.zeros((size, size))

    for i in range(matrix.shape[0]):
        con_mat[matrix[i, 1]][matrix[i, 0]] += 1
        
    print("Confussion Matrix:")
    print(con_mat, end="\n\n")
    
    TP = np.diag(con_mat)
    print("True Positive:  ", TP)
    
    FP = con_mat.sum(axis=0) - np.diag(con_mat)
    print("False Positive: ", FP)
    
    FN = con_mat.sum(axis=1) - np.diag(con_mat)
    print("False Negative: ", FN)
    
    TN = con_mat.sum() - (TP + FP + FN)
    print("True Negative:  ", TN)
    
    precision = TP / (TP + FP)
    print("Precision: ", precision)
    
    recall = TP / (TP + FN)
    print("Recall: ", recall)
    
    f1_score = 2 * (precision * recall) / (precision + recall)
    return f1_score

In [7]:
calculate_f1_score(matrix)

Confussion Matrix:
[[46. 12.  2.]
 [ 9. 61.  0.]
 [ 2.  3. 25.]]

True Positive:   [46. 61. 25.]
False Positive:  [11. 15.  2.]
False Negative:  [14.  9.  5.]
True Negative:   [ 89.  75. 128.]
Precision:  [0.80701754 0.80263158 0.92592593]
Recall:  [0.76666667 0.87142857 0.83333333]


array([0.78632479, 0.83561644, 0.87719298])

In [8]:
def calculate_macro_avg_precision(matrix):
    """
        Args:
            matrix        (numpy.ndarray): Matrix of dimension N x 2 
                                     where First column = Predicted Class
                                           Second column = True Class
        Returns:
            macro_avg_precision        (numpy.float64):  macro_avg_precision value
    """
    
    size = len(set(matrix[:, 0])) 
    con_mat = np.zeros((size, size))

    for i in range(matrix.shape[0]):
        con_mat[matrix[i, 1]][matrix[i, 0]] += 1
        
    print("Confussion Matrix:")
    print(con_mat, end="\n\n")
    
    TP = np.diag(con_mat)
    print("True Positive:  ", TP)
    
    FP = con_mat.sum(axis=0) - np.diag(con_mat)
    print("False Positive: ", FP)
    
    FN = con_mat.sum(axis=1) - np.diag(con_mat)
    print("False Negative: ", FN)
    
    TN = con_mat.sum() - (TP + FP + FN)
    print("True Negative:  ", TN)
    
    precision = TP / (TP + FP)
    print("Precision: ", precision)
    
    macro_avg_precision = precision.sum() / len(precision)
    return macro_avg_precision

In [9]:
calculate_macro_avg_precision(matrix)

Confussion Matrix:
[[46. 12.  2.]
 [ 9. 61.  0.]
 [ 2.  3. 25.]]

True Positive:   [46. 61. 25.]
False Positive:  [11. 15.  2.]
False Negative:  [14.  9.  5.]
True Negative:   [ 89.  75. 128.]
Precision:  [0.80701754 0.80263158 0.92592593]


0.8451916829109812

In [10]:
def calculate_micro_avg_precision(matrix):
    """
        Args:
            matrix        (numpy.ndarray): Matrix of dimension N x 2 
                                     where First column = Predicted Class
                                           Second column = True Class
        Returns:
            micro_avg_precision        (numpy.float64):  micro_avg_precision value
    """
    
    size = len(set(matrix[:, 0])) 
    con_mat = np.zeros((size, size))

    for i in range(matrix.shape[0]):
        con_mat[matrix[i, 1]][matrix[i, 0]] += 1
        
    print("Confussion Matrix:")
    print(con_mat, end="\n\n")
    
    TP = np.diag(con_mat)
    print("True Positive:  ", TP)
    
    FP = con_mat.sum(axis=0) - np.diag(con_mat)
    print("False Positive: ", FP)
    
    FN = con_mat.sum(axis=1) - np.diag(con_mat)
    print("False Negative: ", FN)
    
    TN = con_mat.sum() - (TP + FP + FN)
    print("True Negative:  ", TN)
    
    micro_avg_precision = TP.sum() / (TP.sum() + FP.sum())
    return micro_avg_precision

In [11]:
calculate_micro_avg_precision(matrix)

Confussion Matrix:
[[46. 12.  2.]
 [ 9. 61.  0.]
 [ 2.  3. 25.]]

True Positive:   [46. 61. 25.]
False Positive:  [11. 15.  2.]
False Negative:  [14.  9.  5.]
True Negative:   [ 89.  75. 128.]


0.825

In [75]:
class Evaluation:
    def __init__(self, pred_y, true_y):
        self.pred_y = pred_y
        self.true_y = true_y
        self.init_confussion_matrix()
        
    def init_confussion_matrix(self):
        matrix = np.stack((self.pred_y, self.true_y), axis=1)
        
        self.size = len(set(matrix[:, 0])) 
        self.con_mat = np.zeros((self.size, self.size))

        for i in range(matrix.shape[0]):
            self.con_mat[matrix[i, 1]][matrix[i, 0]] += 1
            
        self.TP = np.diag(self.con_mat)
        self.FP = self.con_mat.sum(axis=0) - np.diag(self.con_mat)
        self.FN = self.con_mat.sum(axis=1) - np.diag(self.con_mat)
        self.TN = self.con_mat.sum() - (self.TP + self.FP + self.FN)
        
        
    def getPrecision(self):
        return self.TP / (self.TP + self.FP)
    
    def getRecall(self):
        return self.TP / (self.TP + self.FN)
    
    def getF1Score(self):
        precision = self.getPrecision()
        recall = self.getRecall()
        return 2 * (precision * recall) / (precision + recall)
    
    def getMicroAvgPrecision(self):
        micro_avg_precision = self.TP.sum() / (self.TP.sum() + self.FP.sum())
        return micro_avg_precision
    
    def getMacroAvgPrecision(self):
        precision = self.getPrecision()
        macro_avg_precision = precision.sum() / len(precision)
        return macro_avg_precision
    
    def getMicroAvgRecall(self):
        micro_avg_recall = self.TP.sum() / (self.TP.sum() + self.FN.sum())
        return micro_avg_recall
    
    def getMacroAvgRecall(self):
        recall = self.getRecall()
        macro_avg_recall = recall.sum() / len(recall)
        return macro_avg_recall
    
    def getMicroAvgF1Score(self):
        micro_avg_precision = self.getMicroAvgPrecision()
        micro_avg_recall = self.getMicroAvgRecall()
        micro_avg_f1Score = 2 * ((micro_avg_precision * micro_avg_recall) / (micro_avg_precision + micro_avg_recall))
        return micro_avg_f1Score
    
    def getMacroAvgF1Score(self):
        f1Score = self.getF1Score()
        macro_avg_f1Score = f1Score.sum() / len(f1Score)
        return macro_avg_f1Score
    
    def getAccuracy(self):
        return (self.TP + self.TN) / self.con_mat.sum()
    
    def getOverAllAccuracy(self):
        overall_accuracy = self.TP.sum() / self.con_mat.sum()
        return overall_accuracy
    
    def printEvaluation(self):
        precision = self.getPrecision()
        recall = self.getRecall()
        f1Score = self.getF1Score()
        accuracy = self.getAccuracy()
        
        print("-" * 66)
        print("|{: ^12}|{: ^12}|{: ^12}|{: ^12}|{: ^12}|".format("Class", "Accuracy", "Precision", "Recall", "F1-Score"))
        for i in range(self.size):
            print("|{: >12}|{: >12.5f}|{: >12.5f}|{: >12.5f}|{: >12.5f}|".format(i+1, accuracy[i], precision[i], recall[i], f1Score[i]))
        
        print("-" * 66)
        
        micro_avg_precision = self.getMicroAvgPrecision()
        micro_avg_recall = self.getMicroAvgRecall()
        micro_avg_f1Score = self.getMicroAvgF1Score()
        
        macro_avg_precision = self.getMacroAvgPrecision()
        macro_avg_recall = self.getMacroAvgRecall()
        macro_avg_f1Score = self.getMacroAvgF1Score()
        
        overall_accuracy = self.getOverAllAccuracy()
        
        print("Micro Avg Precision: ", micro_avg_precision)
        print("Micro Avg Recall: ", micro_avg_recall)
        print("Micro Avg F1Score: ", micro_avg_f1Score)
        
        print("Macro Avg Precision: ", macro_avg_precision)
        print("Macro Avg Recall: ", macro_avg_recall)
        print("Macro Avg F1Score: ", macro_avg_f1Score)
        
        print("Overall Accuracy: ", overall_accuracy)

In [76]:
ev = Evaluation(pred_Y, true_Y)

In [42]:
ev.getPrecision()

array([0.80701754, 0.80263158, 0.92592593])

In [43]:
ev.getRecall()

array([0.76666667, 0.87142857, 0.83333333])

In [44]:
ev.getF1Score()

array([0.78632479, 0.83561644, 0.87719298])

In [45]:
ev.getMicroAvgPrecision()

0.825

In [46]:
ev.getMacroAvgPrecision()

0.8451916829109812

In [47]:
ev.getMicroAvgRecall()

0.825

In [48]:
ev.getMacroAvgRecall()

0.8238095238095239

In [49]:
ev.getMicroAvgF1Score()

0.825

In [50]:
ev.getMacroAvgF1Score()

0.8330447357123637

In [27]:
ev.getAccuracy()

array([0.84375, 0.85   , 0.95625])

In [53]:
ev.getOverAllAccuracy()

0.825

In [77]:
ev.printEvaluation()

------------------------------------------------------------------
|   Class    |  Accuracy  | Precision  |   Recall   |  F1-Score  |
|           1|     0.84375|     0.80702|     0.76667|     0.78632|
|           2|     0.85000|     0.80263|     0.87143|     0.83562|
|           3|     0.95625|     0.92593|     0.83333|     0.87719|
------------------------------------------------------------------
Micro Avg Precision:  0.825
Micro Avg Recall:  0.825
Micro Avg F1Score:  0.825
Macro Avg Precision:  0.8451916829109812
Macro Avg Recall:  0.8238095238095239
Macro Avg F1Score:  0.8330447357123637
Overall Accuracy:  0.825


### Linear Regression

In [2]:
X = np.array([[440, 1],
             [448, 1],
             [6205, 1],
             [4850, 1],
             [1420, 1],
             [16500, 1],
             [1776, 1],
             [6989, 1],
             [2040, 1],
             [9356, 1],])

In [8]:
X.T

array([[  440,   448,  6205,  4850,  1420, 16500,  1776,  6989,  2040,
         9356],
       [    1,     1,     1,     1,     1,     1,     1,     1,     1,
            1]])

In [9]:
X

array([[  440,     1],
       [  448,     1],
       [ 6205,     1],
       [ 4850,     1],
       [ 1420,     1],
       [16500,     1],
       [ 1776,     1],
       [ 6989,     1],
       [ 2040,     1],
       [ 9356,     1]])

In [10]:
Y = np.array([123, 137, 409, 459, 89, 1067, 230, 496, 208, 652])
Y

array([ 123,  137,  409,  459,   89, 1067,  230,  496,  208,  652])

In [4]:
Z = np.dot(X.T, X)
Z

array([[480381862,     50024],
       [    50024,        10]])

In [5]:
XTY = np.dot(X.T, Y)
XTY

array([33010827,     3870])

In [6]:
Z_inv = np.linalg.inv(Z)
Z_inv

array([[ 4.34514713e-09, -2.17361640e-05],
       [-2.17361640e-05,  2.08732987e-01]])

In [7]:
W = np.dot(Z_inv, XTY)
W

array([5.93179455e-02, 9.02679097e+01])

In [10]:
Berlin = 440 * W[0] + W[1]
Moscow = 2040 * W[0] + W[1]
Kyiv = 1591 * W[0] + W[1]

print("Berlin: ", Berlin)
print("Moscow: ", Moscow)
print("Kyiv:   ", Kyiv)

Berlin:  116.36780566234223
Moscow:  211.27651838815586
Kyiv:    184.64276087947442


In [22]:
pred_y = np.array([Berlin, Moscow, Kyiv])
true_y = np.array([123, 208, 138])

In [23]:
# Mean Squared Error
MSE = np.square(np.subtract(true_y, pred_y)).mean()
MSE

743.4229056467376

### K-Means Clustering

In [2]:
from sklearn.preprocessing import MinMaxScaler

In [33]:
X = np.array([[10, 125],
                  [12, 187],
                  [28, 196],
                  [18, 125],
                  [8, 174],
                  [23, 125],
                  [14, 132],
                  [20, 133],
                  [1, 180]])

Y = np.array([0, 0, 1, 0, 1, 1, 0, 1, 0])

In [34]:
scaler = MinMaxScaler()
# fit using the train set
scaler.fit(X)
# transform the test test
X = scaler.transform(X)

In [19]:
max_iter = 1
num_clusters = 2
num_samples = X.shape[0]
num_features = X.shape[1]

In [20]:
# def init_centroids():
#     centroids = np.zeros((num_clusters, num_features))
    
#     for k in range(num_clusters):
#         centroid = X[np.random.choice(range(num_samples))]
#         centroids[k] = centroid
        
#     return centroids

In [21]:
def init_clusters():
    clusters = [[] for _ in range(num_clusters)]
    
    for idx, sample in enumerate(X):
#         # calculate euclidian distance
#         closest_centroid = np.argmin(np.sqrt(np.sum((sample - centroids) ** 2, axis=1)))
        clusters[Y[idx]].append(idx)
        
    return clusters

In [22]:
def compute_new_centroids(clusters):
    centroids = np.zeros((num_clusters, num_features))
    
    for idx, cluster in enumerate(clusters):
        new_centroid = np.mean(X[cluster], axis=0)
        centroids[idx] = new_centroid
        
    return centroids

In [23]:
def update_clusters(centroids):
    clusters = [[] for _ in range(num_clusters)]
    
    for idx, sample in enumerate(X):
        # calculate euclidian distance
        closest_centroid = np.argmin(np.sqrt(np.sum((sample - centroids) ** 2, axis=1)))
        clusters[closest_centroid].append(idx)
        
    return clusters

In [24]:
def predict_cluster_labels(clusters):
    y_pred = np.zeros(num_samples)
    
    for clus_idx, cluster in enumerate(clusters):
        for sample_idx in cluster:
            y_pred[sample_idx] = clus_idx
            
    return y_pred

In [25]:
# centroids = init_centroids()

for itr_num in range(max_iter):
    print("Iteration #: ", itr_num)
    
    clusters = init_clusters()
    
#     previous_centroids = centroids
    centroids = compute_new_centroids(clusters)
    
    clusters = update_clusters(centroids)
    
#     diff = centroids - previous_centroids
    
#     if not diff.any():
#         print("K-Means has converged!")
#         break

Iteration #:  0


In [26]:
y_pred = predict_cluster_labels(clusters)

In [27]:
y_pred

array([0., 1., 1., 0., 0., 1., 0., 1., 0.])