In [165]:
import numpy as np
import pandas as pd
from itertools import combinations

In [166]:
def FDR(X, y, ind):
    l, N = X.shape
    classes = np.unique(y).tolist()
    num_classes = len(classes)

    class_to_index = {c: index for index, c in enumerate(classes)}
    
    m = np.zeros(num_classes)
    v = np.zeros(num_classes)
    for c in classes:
        y_temp = (y == c)
        X_temp = X[ind][y_temp]

        i = class_to_index[c]
        m[i] = np.mean(X_temp)
        v[i] = np.var(X_temp, ddof=1)
    
    a = list(combinations(range(num_classes), 2))
    q = [(m[i] - m[j]) ** 2 / (v[i] + v[j]) for i, j in a]

    return np.sum(q)

In [167]:
iris = pd.read_csv("https://raw.githubusercontent.com/toneloy/data/master/iris.csv")
iris.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [168]:
classes = iris['species'].unique()
classes

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [169]:
dictionary = {clase: i for i, clase in enumerate(classes.tolist())}
dictionary

{'setosa': 0, 'versicolor': 1, 'virginica': 2}

In [170]:
iris_species_decode = iris.copy()
iris_species_decode['species'] = iris['species'].map(dictionary)
iris_species_decode[:150:5]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
5,5.4,3.9,1.7,0.4,0
10,5.4,3.7,1.5,0.2,0
15,5.7,4.4,1.5,0.4,0
20,5.4,3.4,1.7,0.2,0
25,5.0,3.0,1.6,0.2,0
30,4.8,3.1,1.6,0.2,0
35,5.0,3.2,1.2,0.2,0
40,5.0,3.5,1.3,0.3,0
45,4.8,3.0,1.4,0.3,0


Para calcular el FDR las filas deben representar las características y las columnas las muestras.

In [171]:
targets = np.array(iris_species_decode['species'].T)
targets

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [183]:
iris_species_without_species_col = iris_species_decode.drop('species', axis=1)
data_columns = iris_species_without_species_col.columns.tolist()
data_columns

['sepal_length', 'sepal_width', 'petal_length', 'petal_width']

In [184]:
data = np.array(iris_species_without_species_col.T)
data

array([[5.1, 4.9, 4.7, 4.6, 5. , 5.4, 4.6, 5. , 4.4, 4.9, 5.4, 4.8, 4.8,
        4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5. ,
        5. , 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5. , 5.5, 4.9, 4.4,
        5.1, 5. , 4.5, 4.4, 5. , 5.1, 4.8, 5.1, 4.6, 5.3, 5. , 7. , 6.4,
        6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5. , 5.9, 6. , 6.1, 5.6,
        6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7,
        6. , 5.7, 5.5, 5.5, 5.8, 6. , 5.4, 6. , 6.7, 6.3, 5.6, 5.5, 5.5,
        6.1, 5.8, 5. , 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 6.3, 5.8, 7.1, 6.3,
        6.5, 7.6, 4.9, 7.3, 6.7, 7.2, 6.5, 6.4, 6.8, 5.7, 5.8, 6.4, 6.5,
        7.7, 7.7, 6. , 6.9, 5.6, 7.7, 6.3, 6.7, 7.2, 6.2, 6.1, 6.4, 7.2,
        7.4, 7.9, 6.4, 6.3, 6.1, 7.7, 6.3, 6.4, 6. , 6.9, 6.7, 6.9, 5.8,
        6.8, 6.7, 6.7, 6.3, 6.5, 6.2, 5.9],
       [3.5, 3. , 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3. ,
        3. , 4. , 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3. ,
       

In [173]:
l = data.shape[0]
l

4

In [174]:
fdrs = [FDR(data, targets, i) for i in range(l)]
fdrs

[np.float64(7.582273507540444),
 np.float64(2.825609473089156),
 np.float64(84.34297915349869),
 np.float64(64.1199102077605)]

In [187]:
fdrs_map_to_colum = {column: float(fdr) for column, fdr in zip(data_columns, fdrs)}
fdrs_map_to_colum

{'sepal_length': 7.582273507540444,
 'sepal_width': 2.825609473089156,
 'petal_length': 84.34297915349869,
 'petal_width': 64.1199102077605}