In [287]:
import pandas as pd
import numpy as np

In [288]:
def get_sw(df):
    labels = df['species']
    data = df.drop('species', axis=1)

    classes = labels.unique().tolist()
    M  = len(classes)

    probs = np.array([sum(labels == c) / len(labels) for c in classes])
    cov_mat = np.array([data[labels == c].cov() for c in classes])

    sw = np.zeros_like(cov_mat[0])
    for i in range(M):
        sw += probs[i] * cov_mat[i]
        
    return sw

In [289]:
def get_sb(df):
    labels = df['species']
    data = df.drop('species', axis=1)

    classes = labels.unique().tolist()
    M = len(classes)

    probs = np.array([sum(labels == c) / len(labels) for c in classes])
    global_mean = data.mean().values.reshape((-1, 1))
    class_means = [data[labels == c].mean().values.reshape((-1, 1)) for c in classes]

    sb = np.zeros((data.shape[1], data.shape[1]))
    for i in range(M):
        diff = class_means[i] - global_mean
        sb += probs[i] * (diff @ diff.T)

    return sb


In [290]:
def get_sm(df):    
    return get_sw(df) + get_sb(df)

In [291]:
def get_j1(df):
    sm = get_sm(df)
    sw = get_sw(df)

    return (sm.trace() / sw.trace())

In [292]:
def get_j2():
    pass

In [293]:
def get_j3():
    pass

In [294]:
iris = pd.read_csv("https://raw.githubusercontent.com/toneloy/data/master/iris.csv")
iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [295]:
iris[:150:2]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
6,4.6,3.4,1.4,0.3,setosa
8,4.4,2.9,1.4,0.2,setosa
...,...,...,...,...,...
140,6.7,3.1,5.6,2.4,virginica
142,5.8,2.7,5.1,1.9,virginica
144,6.7,3.3,5.7,2.5,virginica
146,6.3,2.5,5.0,1.9,virginica


In [296]:
classes = iris['species'].unique()
classes

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [297]:
dictionary = {clase: i for i, clase in enumerate(classes.tolist())}
dictionary

{'setosa': 0, 'versicolor': 1, 'virginica': 2}

In [298]:
iris_species_decode = iris.copy()
iris_species_decode['species'] = iris['species'].map(dictionary)
iris_species_decode[:150:5]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
5,5.4,3.9,1.7,0.4,0
10,5.4,3.7,1.5,0.2,0
15,5.7,4.4,1.5,0.4,0
20,5.4,3.4,1.7,0.2,0
25,5.0,3.0,1.6,0.2,0
30,4.8,3.1,1.6,0.2,0
35,5.0,3.2,1.2,0.2,0
40,5.0,3.5,1.3,0.3,0
45,4.8,3.0,1.4,0.3,0


In [299]:
get_sw(iris_species_decode)

array([[0.26500816, 0.09272109, 0.16751429, 0.03840136],
       [0.09272109, 0.11538776, 0.05524354, 0.0327102 ],
       [0.16751429, 0.05524354, 0.18518776, 0.04266531],
       [0.03840136, 0.0327102 , 0.04266531, 0.04188163]])

In [300]:
get_sb(iris_species_decode)

array([[ 0.42141422, -0.13301778,  1.101656  ,  0.47519556],
       [-0.13301778,  0.07563289, -0.38159733, -0.15288444],
       [ 1.101656  , -0.38159733,  2.91401867,  1.24516   ],
       [ 0.47519556, -0.15288444,  1.24516   ,  0.53608889]])

In [301]:
get_sm(iris_species_decode)

array([[ 0.68642239, -0.04029669,  1.26917029,  0.51359692],
       [-0.04029669,  0.19102064, -0.3263538 , -0.12017424],
       [ 1.26917029, -0.3263538 ,  3.09920642,  1.28782531],
       [ 0.51359692, -0.12017424,  1.28782531,  0.57797052]])

In [303]:
get_j1(iris_species_decode)

np.float64(7.497745018331999)