In [1]:
import numpy as np
from sklearn.preprocessing import StandardScaler

In [2]:
data = np.array([
[37.5926,0.319714,-2.13876, "cement"],
[48.5556,0.278822,-1.99604, "path"],
[19.0741,0.381867,2.39502, "grass"],
[14.6296,0.416705,2.30688, "grass"],
[11.8519,0.405556,2.12865, "grass"],
[44.8889,0.246605,-1.97178, "cement"],
[22.1852,0.411811,2.62471, "grass"],
[49.5926,0.302925,-2.02227, "path"],
[14.1481,0.421621,2.39249, "grass"],
[47.7037,0.290278,-2.0527, "path"],
[25.1111,0.335127,2.76904, "grass"],
[51.7778,0.290148,-2.0141, "path"],
[55.3704,0.268621,-2.00619, "cement"],
[47.5926,0.317542,-2.12287, "cement"],
[66.7778,0.32276,-2.12446, "cement"],
])

In [3]:
X = data[:, :3]

scaler = StandardScaler()
scaler.fit(X)
X_std = scaler.transform(X)

In [4]:
print(X_std)

[[ 0.02751356 -0.25176452 -0.85519872]
 [ 0.67046767 -0.97206809 -0.79038906]
 [-1.05855281  0.84304687  1.20360696]
 [-1.31921226  1.45671057  1.16358227]
 [-1.48211782  1.26032339  1.08264739]
 [ 0.45542436 -1.53956344 -0.77937251]
 [-0.87609413  1.37050384  1.30791001]
 [ 0.73128528 -0.54749906 -0.80230019]
 [-1.3474511   1.54330482  1.20245808]
 [ 0.62050575 -0.77027317 -0.81611857]
 [-0.70449699  0.01973208  1.37345077]
 [ 0.85944211 -0.7725631  -0.79859017]
 [ 1.07013963 -1.15175646 -0.79499821]
 [ 0.61398999 -0.29002382 -0.84798301]
 [ 1.73915676 -0.1981099  -0.84870504]]


In [5]:
def get_means_by_characteristic(classes: list, column: int, column_class: int, X_std: list, data: list):
    characteristics_by_class = []

    for simple_class in classes:
        elements = []
        for i,row in enumerate(X_std.tolist()):
            if(simple_class == data[i,column_class]):
                elements.append(row[column])
        characteristics_by_class.append(elements)

    means_by_class = []
    for characteristic in characteristics_by_class:
        means_by_class.append(np.mean(characteristic))
    
    std_by_class = []
    for characteristic in characteristics_by_class:
        std_by_class.append(np.std(characteristic, ddof=1))
    return means_by_class, std_by_class


In [6]:
classes = ["cement","path","grass"]
print(get_means_by_characteristic(classes=classes, column=0, column_class=3, X_std=X_std, data=data))
print(get_means_by_characteristic(classes=classes, column=1, column_class=3, X_std=X_std, data=data))
print(get_means_by_characteristic(classes=classes, column=2, column_class=3, X_std=X_std, data=data))

([0.7812448618063769, 0.7204252033176923, -1.1313208537171098], [0.6525525730445695, 0.10315569775053486, 0.30255196399610623])
([-0.6862436298706002, -0.7656008542204589, 1.0822702610391424], [0.6182421982557625, 0.17346221177721632, 0.5751095796870245])
([-0.8252514974775419, -0.8018494971161546, 1.2222759126420548], [0.03529791021236269, 0.010735833284087589, 0.10379728464029692])


In [7]:
pow(0.6525525730445695,2)

0.4258248605870882

# **Ambiente**

In [8]:
def calculate_TP_TN(y_pred, y_real, class_):
    count = 0
    for i in np.arange(y_pred.shape[0]):
        if((y_pred[i] == class_) and (y_real[i] == class_)):
            count += 1
    
    return count

def calculate_FN(y_pred, y_real, class_real, class_pred):
    count = 0
    for i in np.arange(y_pred.shape[0]):
        if((y_pred[i] == class_pred) and (y_real[i] == class_real)):
            count += 1

    return count

In [9]:
y_pred = np.array(["puntual",
          "puntual",
          "puntual",
          "tarde",
          "puntual",
          "retrasado",
          "puntual",
          "puntual",
          "retrasado",
          "puntual",
          "puntual",
          "tarde",
          "puntual",
          "retrasado",
          "puntual",
          "puntual",
          "puntual",
          "puntual",
          "puntual",
          ])

y_real = np.array([ "puntual",
            "tarde",
            "puntual",
            "retrasado",
            "puntual",
            "retrasado",
            "puntual",
            "puntual",
            "tarde",
            "puntual",
            "puntual",
            "tarde",
            "puntual",
            "retrasado",
            "puntual",
            "tarde",
            "puntual",
            "tarde",
            "retrasado"
])

In [10]:
confussion_matrix = np.zeros((3,3))
print(confussion_matrix)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [11]:
a1 = calculate_TP_TN(y_real=y_real, y_pred=y_pred, class_="puntual")
a2 = calculate_TP_TN(y_real=y_real, y_pred=y_pred, class_="retrasado")
a3 = calculate_TP_TN(y_real=y_real, y_pred=y_pred, class_="tarde")
confussion_matrix[0][0] = a1
confussion_matrix[1][1] = a2
confussion_matrix[2][2] = a3

print(confussion_matrix)

[[10.  0.  0.]
 [ 0.  2.  0.]
 [ 0.  0.  1.]]


In [12]:
tf1 = calculate_FN(y_real=y_real, y_pred=y_pred, class_real="puntual", class_pred="retrasado")
print(tf1)

0


In [13]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_real, y_pred)
print(cm)

[[10  0  0]
 [ 1  2  1]
 [ 3  1  1]]
