In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import sys
import random
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix


In [2]:
df = pd.read_csv("DataOpt/Final_join_prueba.csv")

In [5]:
# Funciones

# Funcion para Normalizar la Vista minable a exepción de la etiqueta(Variable Objetivo)
def EncoderViewMinable(df):
    new_dataset = df
    norm = MinMaxScaler()
    norm = norm.fit(new_dataset.values[:,:])
    valMin = norm.data_min_
    valMax = norm.data_max_
    dataRange = norm.data_range_
    #df_norm = norm.fit_transform(df.values[:,:-1])

    return [valMin, valMax, dataRange]




'''
Funcion para generar el vector de pesos aleatorio.
Entradas:
w: Vector de pesos w [0,1], igual al numero de caracteristicas normalizadas.
nc: Nunero de ceros que debe contener el vector de pesos [10,20,30,40,50]
'''


def GenerateWeightVector(w, nc):
    posceros = np.random.choice(len(w), nc, replace=False)
    w[posceros] = 0
    s  = np.sum(w)
    wf = np.round(w/s, 4)
    return wf




# Funcion para Normalizar la Vista minable a exepción de la etiqueta(Variable Objetivo)
# df: es la matriz df.values [] , no incluye la etiqueta del grupo
def NormalizeViewMinable(df,valMin, dataRange):
    dataset_normalizado = np.empty((df.shape[0], df.shape[1]))
    for i in range(df.shape[0]):
        for j in range(df.shape[1]):
            dataset_normalizado[i][j]= (df[i][j] - valMin[j])/dataRange[j]

    return dataset_normalizado

In [11]:
#Lectura de los Encoders
valMin = np.loadtxt('DataOpt/Encoder_ValMin.txt')
print("Encoder: ", len(valMin))
dataRange = np.loadtxt('DataOpt/Encoder_dataRange.txt')

#2. Normalizamos los datos
df_normalizado = NormalizeViewMinable(df.values[:, :-1],valMin, dataRange)
print(df_normalizado)

Encoder:  174
[[0.26666667 0.3559322  0.64179104 ... 0.         1.         0.        ]
 [0.33333333 0.3559322  0.3880597  ... 0.         1.         0.        ]
 [0.26666667 0.33898305 0.47761194 ... 0.         1.         0.        ]
 ...
 [0.33333333 0.33898305 0.6119403  ... 0.         1.         0.        ]
 [0.26666667 0.37288136 0.55223881 ... 0.         1.         0.        ]
 [0.33333333 0.3559322  0.40298507 ... 0.         1.         0.        ]]


In [16]:
df_normalizado

array([[0.26666667, 0.3559322 , 0.64179104, ..., 0.        , 1.        ,
        0.        ],
       [0.33333333, 0.3559322 , 0.3880597 , ..., 0.        , 1.        ,
        0.        ],
       [0.26666667, 0.33898305, 0.47761194, ..., 0.        , 1.        ,
        0.        ],
       ...,
       [0.33333333, 0.33898305, 0.6119403 , ..., 0.        , 1.        ,
        0.        ],
       [0.26666667, 0.37288136, 0.55223881, ..., 0.        , 1.        ,
        0.        ],
       [0.33333333, 0.3559322 , 0.40298507, ..., 0.        , 1.        ,
        0.        ]])

In [12]:
# 3 Generar vector de pesos
np.random.seed(0)
vp = np.random.rand(174)
print("Vector de pesos Original: ", vp)
wi = GenerateWeightVector(vp, 0)
print(wi)
print("Suma vector de Pesos: ", sum(wi))

Vector de pesos Original:  [0.5488135  0.71518937 0.60276338 0.54488318 0.4236548  0.64589411
 0.43758721 0.891773   0.96366276 0.38344152 0.79172504 0.52889492
 0.56804456 0.92559664 0.07103606 0.0871293  0.0202184  0.83261985
 0.77815675 0.87001215 0.97861834 0.79915856 0.46147936 0.78052918
 0.11827443 0.63992102 0.14335329 0.94466892 0.52184832 0.41466194
 0.26455561 0.77423369 0.45615033 0.56843395 0.0187898  0.6176355
 0.61209572 0.616934   0.94374808 0.6818203  0.3595079  0.43703195
 0.6976312  0.06022547 0.66676672 0.67063787 0.21038256 0.1289263
 0.31542835 0.36371077 0.57019677 0.43860151 0.98837384 0.10204481
 0.20887676 0.16130952 0.65310833 0.2532916  0.46631077 0.24442559
 0.15896958 0.11037514 0.65632959 0.13818295 0.19658236 0.36872517
 0.82099323 0.09710128 0.83794491 0.09609841 0.97645947 0.4686512
 0.97676109 0.60484552 0.73926358 0.03918779 0.28280696 0.12019656
 0.2961402  0.11872772 0.31798318 0.41426299 0.0641475  0.69247212
 0.56660145 0.26538949 0.52324805 0.09

In [26]:
def qualityFunction(df_norm, wi,df,k):
    #print("longitud df_norm: ",len(df_norm))
    #print("Longitud wi: ", len(wi))
    y_pred = []
    for i in range(len(df_norm)):
        vrf = df_norm[i]
        print(f"Vector {i} :", vrf) 
        ListaPesosPonderados= [[sys.float_info.max,0] for a in range(k)]
        for j in range(len(df_norm)):
            if i != j:
                print(ListaPesosPonderados)
                ri = wi* np.power((df_norm[j] - vrf), 2)
                dE = np.sum(ri)
                print("Distancia Euclideana: ", dE)
                if dE < ListaPesosPonderados[k-1][0]:
                    ListaPesosPonderados[k-1][0]=dE
                    ListaPesosPonderados[k-1][1]=j
                    ListaPesosPonderados.sort(key=lambda x: x[0], reverse=False)
        print(f"vector {i} es muy similar a los vectores en la posición {ListaPesosPonderados}")

        grupos = []
        for i in range(len(ListaPesosPonderados)):
            index=ListaPesosPonderados[i][1]
            g = df.values[index][-1]
            grupos.append(g)
        
        print("Grupos asociados: ", grupos)
        grupoSelected = int(pd.Series(grupos).value_counts().index[0])
        print("Grupo seleccionado: ", grupoSelected)
        y_pred.append(grupoSelected)
    qs = accuracy_score(df.values[:,-1], y_pred)
    #mc = confusion_matrix(df.values[:,-1], y_pred)
    #print("Matriz de Confusión: ", mc)

    return [qs, y_pred]



def qualityFunctionOriginal(df_norm, wi,df):
    #print("longitud df_norm: ",len(df_norm))
    #print("Longitud wi: ", len(wi))
    y_pred = []
    for i in range(len(df_norm)):
        posMinDep = 0
        minDep = sys.float_info.max 
        vrf = df_norm[i]
        print(f"vercor {i}: {vrf}")
        
        for j in range(len(df_norm)):
            if i != j:
                ri = wi* np.power((df_norm[j] - vrf), 2)
                dE = np.sum(ri)
                print("dis: ",dE)
                if dE < minDep:
                    posMinDep=j
                    minDep = dE
        #print(posMinDep)
        grupo=df.values[posMinDep][-1]
        print("Grupo: ", grupo)
        y_pred.append(grupo)
        print(f"Posición {posMinDep} y distancia {minDep} hasta el momento")
    qs = accuracy_score(df.values[:,-1], y_pred)
    return [qs, y_pred]


In [39]:
calidad = qualityFunction(df_normalizado, wi,df,2)


Vector 0 : [0.26666667 0.3559322  0.64179104 0.66257669 0.02597403 0.
 0.         0.         0.         0.         0.         0.33333333
 0.25       0.         0.         0.         0.22222222 0.
 0.         0.         0.33333333 0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.28571429 0.20634921 0.33333333 0.2        0.         0.
 0.32307692 0.31147541 0.25       0.         0.3        0.
 0.         0.         0.         0.         0.7        0.
 0.         1.         0.         0.         0.         0.
 0.         0.         0.         0.52478134 0.64948454 0.59565217
 0.42468619 0.57012481 0.46341463 0.24986667 0.2173913  0.34443657
 0.66025641 0.70881226 0.80365297 0.51576577 0.47831993 0.66666667
 0.63812601 0.77272727 0.23794549 0.43603133 0.78214286 0.72834646
 0.35328185 0.54524816 0.43956044 0.28080963 0.29032258 0.60872483
 1.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         

In [40]:
calidad

[0.2608695652173913,
 [2, 2, 2, 0, 2, 1, 1, 0, 2, 1, 0, 0, 0, 2, 0, 1, 2, 0, 2, 2, 2, 0, 0]]

In [21]:
df.Grupo

0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     1
8     1
9     1
10    1
11    1
12    1
13    1
14    1
15    2
16    2
17    2
18    2
19    2
20    2
21    2
22    2
Name: Grupo, dtype: int64

In [64]:
from sklearn.cluster import KMeans
from sklearn import metrics

In [66]:
label = df['Grupo']
df_normalizado

array([[0.26666667, 0.3559322 , 0.64179104, ..., 0.        , 1.        ,
        0.        ],
       [0.33333333, 0.3559322 , 0.3880597 , ..., 0.        , 1.        ,
        0.        ],
       [0.26666667, 0.33898305, 0.47761194, ..., 0.        , 1.        ,
        0.        ],
       ...,
       [0.33333333, 0.33898305, 0.6119403 , ..., 0.        , 1.        ,
        0.        ],
       [0.26666667, 0.37288136, 0.55223881, ..., 0.        , 1.        ,
        0.        ],
       [0.33333333, 0.3559322 , 0.40298507, ..., 0.        , 1.        ,
        0.        ]])

In [67]:
df.dtypes.unique()

array([dtype('int64'), dtype('float64')], dtype=object)

In [106]:
kmeans = KMeans(3,max_iter=1000).fit(df_normalizado)

In [107]:
centroids = kmeans.cluster_centers_

In [108]:
kmeans.labels_

array([1, 2, 1, 1, 0, 1, 2, 2, 0, 0, 2, 1, 1, 1, 0, 0, 2, 1, 0, 0, 2, 1,
       2])

In [104]:
kmeans.labels_ = np.array([0,0,0,0,0,0,0,1,1,1,2,1,1,1,2,2,2,2,2,2,2,2,2])
len(kmeans.labels_)

23

In [109]:
print("Homogeneity_score: ", metrics.homogeneity_score(label, kmeans.labels_))
print("Completeness_score: ", metrics.completeness_score(label, kmeans.labels_))
print("v_measure_score: ", metrics.v_measure_score(label, kmeans.labels_))
print("Adjusted_rand_score: ", metrics.adjusted_rand_score(label, kmeans.labels_))
print("Adjusted_mutual_info_score: ", metrics.adjusted_mutual_info_score(label,  kmeans.labels_))



Homogeneity_score:  0.043354699711899446
Completeness_score:  0.043571187423383874
v_measure_score:  0.04346267398782621
Adjusted_rand_score:  -0.0509502628386575
Adjusted_mutual_info_score:  -0.05551623591118613


# PRUEBAS INTERVALOS DE CONFIANZA

In [258]:
import pandas as pd
import numpy as np
from sklearn.linear_model import ElasticNet
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score, accuracy_score
from sklearn.metrics import mean_absolute_error,max_error,mean_squared_error
from sklearn.metrics import mean_squared_error
from scipy import stats
import joblib


In [284]:
# Funciones

# Funciones para el calculo de la regresion Lineal
class LinearRegession():
    def __init__(self, df, alpha, l1_ratio):
        self.df = df
        self.alpha = alpha
        self.l1_ratio = l1_ratio
    

    def CalcularModeloLR(self):
        # alpha=0.1, l1_ratio=0.97
        Y = self.df.RDT_AJUSTADO.values
        X = self.df.drop(["RDT_AJUSTADO","ID_LOTE"], axis=1).values 
        modelElasticNet = ElasticNet(alpha=self.alpha, l1_ratio=self.l1_ratio, random_state=123)
        model = modelElasticNet.fit(X,Y)
        #r_2 = model.score(X,Y)
        # Pedicciones
        yhat = model.predict(X)
        r_2 = r2_score(Y, yhat)
        # Calcular el error residual en el conjunto de entrenamiento
        residuals = Y - yhat
        print("RESIDUOS: ", residuals)

        # Calcular la varianza del error residual
        residual_variance = np.var(residuals, ddof=1)
        print("Varianza residual: ", residual_variance)
        
        return [model, r_2, yhat, residual_variance]


# Funciones para el calculo de la regresion Lineal
class LinearRegessionv2():
    def __init__(self, df, alpha, l1_ratio):
        self.df = df
        self.alpha = alpha
        self.l1_ratio = l1_ratio
    

    def CalcularModeloLR(self):
        # alpha=0.1, l1_ratio=0.97
        Y = self.df.RDT_AJUSTADO.values
        X = self.df.drop(["RDT_AJUSTADO","ID_LOTE"], axis=1).values 
        modelElasticNet = ElasticNet(alpha=self.alpha, l1_ratio=self.l1_ratio, random_state=123)
        model = modelElasticNet.fit(X,Y)
        #r_2 = model.score(X,Y)
        # Pedicciones
        yhat = model.predict(X)
        r_2 = r2_score(Y, yhat)

        return  r_2
    

# Funciones apra el calculo de los Intervalos de Confianza
"""
model: Modelo entrenado
x: Valor a testear (X_test)
varianza residual: 
"""
def predict_with_confidence_intervals(model, X, residual_variance, confidence=0.95):
    # Predicciones
    preds = model.predict(X)

    # Grados de libertad
    n = X.shape[0]
    print("n...",n)
    p = X.shape[1]
    print("p....",p)
    df = n - p - 1

    
    # Error estándar de las predicciones
    se_pred = np.sqrt(residual_variance * (1 + np.sum((X - np.mean(X, axis=0))**2 / np.var(X, axis=0), axis=1)))
    print("Error Estandar", se_pred)
    # Valor crítico para el intervalo de confianza
    t_value = stats.t.ppf((1 + confidence) / 2., df)
    print("t_value: ", t_value)
    # Intervalos de confianza
    ci_upper = preds + t_value * se_pred
    ci_lower = preds - t_value * se_pred

    return preds, ci_lower, ci_upper


def cargarGrupos():
    # Cargamos los respectivos grupos
    listaGruposDefinitivos = []
    G0 = pd.read_excel("DataOpt/grupo_N0.xlsx")
    G1 = pd.read_excel("DataOpt/grupo_N1.xlsx")
    G2 = pd.read_excel("DataOpt/grupo_N2.xlsx")

    print(f"Longitud G0: ",{len(G0)},"longitud G1: ", {len(G1)} , "Longitud G2: ", {len(G2)})

    # Eliminamos clumnas
    G0 = G0.drop(["Unnamed: 0"], axis=1)
    G1 = G1.drop(["Unnamed: 0"], axis=1)
    G2 = G2.drop(["Unnamed: 0"], axis=1)
    listaGruposDefinitivos.append(G0)
    listaGruposDefinitivos.append(G1)
    listaGruposDefinitivos.append(G2)

    return listaGruposDefinitivos


# Funcion apra calcular la correlación de los grupos Finales
def CalcularCorrelationInitial(gruposDefinitivos):
    correlacionesIniciales=[]
    for i in range(len(gruposDefinitivos)):
        r_2 = LinearRegessionv2(gruposDefinitivos[i], 0.1, 0.97).CalcularModeloLR()
        correlacionesIniciales.append(r_2)
    
    return correlacionesIniciales



# test_clean: df sin rdt_ajustado y ID_LOTE
def PredctionsModels(lista_modelos, test_clean):
    predictions = []
    for i in range(len(lista_modelos)):
        psi_pred = []
        print("Prediciones Modelo ", i)
        for j in range(len(test_clean)):
            pred = lista_modelos[i].predict(test_clean.values[j].reshape(1,-1))
            psi_pred.append(pred[0])
        
        predictions.append(psi_pred)
    #Lista de prediciones asociada c/modelo
    return predictions


# Funcion para Cargar modelos .PKL
def CargueModelos():
    lista_modelos = []
    for i in range(3):
        model = joblib.load(f'DataOpt/modelo_entrenado_{i}.pkl') # Carga del modelo.
        lista_modelos.append(model)
    print(len(lista_modelos))
    return lista_modelos

In [285]:
# Lectura del conjunto de test
test = pd.read_csv("DataOpt/dataset_test_Original.csv")
test.head(5)
test_original = test.copy()
test_clean = test.copy()
test_clean = test.drop(["ID_LOTE","RDT_AJUSTADO"], axis=1)
print("Dimenciones conjunto de test: ", test_clean.shape)
test_clean.head(5)

Dimenciones conjunto de test:  (40, 174)


Unnamed: 0,DIAS_EN_EMERGER,DIAS_EN_EMERGER_A_FLORECER,DIAS_EN_FLORECER_A_COSECHAR,POBLACION_20DIAS_AJT,ALTURA_LOT,ContEnfQui_Emer_Flor,ContEnfQui_Flor_Cose,ContMalMec_Siem_Emer,ContMalMec_Emer_Flor,ContMalMec_Flor_Cose,...,CAP_ENDURE_RASTA,MOTEADOS_RASTA,MOTEADOS_MAS70cm._RASTA,OBSERVA_EROSION_RASTA,OBSERVA_MOHO_RASTA,OBSERVA_RAICES_VIVAS_RASTA,OBSERVA_HOJARASCA_MO_RASTA,SUELO_NEGRO_BLANDO_RASTA,CUCHILLO_PRIMER_HTE_RASTA,CERCA_RIOS_QUEBRADAS_RASTA
0,4,47,94,62000,6,0,0,0,0,0,...,0,1,0,0,0,1,0,0,1,1
1,1,49,84,60000,17,0,0,0,0,0,...,0,0,0,0,0,1,0,0,1,0
2,5,46,84,73000,6,0,0,0,0,0,...,0,0,0,0,0,1,1,0,1,0
3,5,50,76,62000,10,0,0,0,0,0,...,0,1,0,0,0,1,0,0,1,1
4,4,45,86,56000,9,0,0,0,0,0,...,0,0,0,0,0,1,0,0,1,0


In [287]:

grupos_definitivos = cargarGrupos()
# Correlaciones iniciales modelos Originales
list_corr = CalcularCorrelationInitial(grupos_definitivos)
print("Correlaciones Iniciales: ", list_corr) 

Longitud G0:  {264} longitud G1:  {260} Longitud G2:  {235}
[0.9399554810754649, 0.9406415751094765, 0.9343683006100506]


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


In [269]:
"""
def agregarPredicitions(test_original, predictions):
    test_predictions = test_original
    gruposTestPrediciones=[]
    for i in range(len(predictions)):
        test_predictions["RDT_AJUSTADO"] = predictions[i]
        print(test_predictions.RDT_AJUSTADO)
        gruposTestPrediciones.append(test_predictions)
    
    return gruposTestPrediciones

"""
def agregarPredicitions(test_original, predictions):
    df0 = test_original.copy()
    df1= test_original.copy()
    df2 = test_original.copy()
    df0.RDT_AJUSTADO = predictions[0]
    df1.RDT_AJUSTADO = predictions[1]
    df2.RDT_AJUSTADO = predictions[2]

    return [df0,df1,df2]


        

In [274]:
df0,df1,df2 = agregarPredicitions(test_original, predictions)

In [260]:
r_2 = LinearRegessionv2(test_original, 0.1, 0.97).CalcularModeloLR()

  model = cd_fast.enet_coordinate_descent(


In [22]:
model, r_2, yhat, residual_variance = LinearRegession(G0, 0.1, 0.97).CalcularModeloLR()

RESIDUOS:  [-1.64725946e+02  1.24196386e+00 -2.00793462e+02 -5.32625508e+01
  1.49608503e+02  3.45987843e+01  2.51925837e+02 -2.90354243e+02
  2.91033955e+02 -2.57505698e+02 -5.66418548e+01 -6.96889286e+02
  2.55622919e+02 -4.51888657e+01  4.78408136e+02  3.10732102e+01
  5.43626886e+02  1.40853646e+02 -2.71411980e+02 -2.58181836e+02
  2.21877473e+02  2.52039252e+02 -1.49478479e+02 -8.77981457e+01
  3.70915712e+01  3.43482248e+02  4.43465449e+02 -3.84940643e+02
  3.73771866e+00 -1.76263431e+01  4.05737260e+02 -4.87301804e+02
 -3.08316864e+01  2.42230460e+02  2.10772723e+02  1.52978553e+02
  2.10523697e+02  2.07725402e+02 -2.34693833e+02  1.34489239e+02
  1.18679258e+02  2.36413494e+01 -2.52112483e+02  1.37464371e+02
 -3.33149089e+01 -1.01243490e+02  4.39589554e+01 -5.41591539e+02
  5.28538430e+02 -4.12286561e+02  2.35298643e+02  3.78507004e+02
  3.33054565e+02  9.28311058e+01  5.67055264e+02 -2.20134291e+02
  5.14843527e+01 -5.11379464e+02  1.76892618e+02  1.20413381e+02
  2.02363074e+

  model = cd_fast.enet_coordinate_descent(


In [41]:
test_clean.values[0]

array([ 4.000000e+00,  4.700000e+01,  9.400000e+01,  6.200000e+04,
        6.000000e+00,  0.000000e+00,  0.000000e+00,  0.000000e+00,
        0.000000e+00,  0.000000e+00,  0.000000e+00,  0.000000e+00,
        1.000000e+00,  0.000000e+00,  0.000000e+00,  0.000000e+00,
        2.000000e+00,  0.000000e+00,  0.000000e+00,  0.000000e+00,
        3.450000e+01,  0.000000e+00,  0.000000e+00,  0.000000e+00,
        0.000000e+00,  0.000000e+00,  0.000000e+00,  0.000000e+00,
        0.000000e+00,  0.000000e+00,  1.000000e+00,  1.000000e+00,
        2.000000e+00,  6.000000e+00, -1.000000e+00, -1.000000e+00,
        2.200000e+01,  1.200000e+01,  2.200000e+01,  0.000000e+00,
        0.000000e+00,  0.000000e+00,  0.000000e+00,  0.000000e+00,
        0.000000e+00,  3.500000e+01,  6.500000e+01,  0.000000e+00,
        0.000000e+00,  1.000000e+02,  0.000000e+00,  0.000000e+00,
        0.000000e+00,  0.000000e+00,  0.000000e+00,  0.000000e+00,
        0.000000e+00,  3.285000e+01,  2.373000e+01,  2.829000e

In [25]:
preds, ci_lower, ci_upper = predict_with_confidence_intervals(model, test_clean.values, residual_variance, confidence=0.95)

n... 41
p.... 174
Error Estandar [ 3747.78629085  3292.09779882  3093.11353385  2899.67847055
  3611.50318667  2810.06540093  3405.37472713  3647.80589911
  3218.89349572  3400.45461987  4996.06970365  3162.63263789
  4893.96592153  3121.84007543  4356.99711507  3284.42724967
  3749.52031473  2949.08748687  4449.50782294  3120.17057997
  2698.01901844  3826.98385184  3297.68901209  3682.7928701
  4925.57483049  4191.7649714   4315.40845856  2900.91008111
  3658.30912036  3064.34140534  2742.48991303  3199.22346288
  5823.51552043  4208.04054075  3579.67373547  3185.55386893
  3375.80846662  2797.93543374  3295.49376571  3528.87952144
 16867.10707972]
t_value:  nan


In [16]:
ci_lower

array([nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       nan, nan])

In [15]:
# Visualizar resulatdos
for i in range(len(test_clean)):
    print(f"Prediction: {preds[i]:.2f}, Confidence Interval: [{ci_lower[i]:.2f}, {ci_upper[i]:.2f}]")


Prediction: 6330.91, Confidence Interval: [nan, nan]
Prediction: 8895.14, Confidence Interval: [nan, nan]
Prediction: 5441.12, Confidence Interval: [nan, nan]
Prediction: 4891.60, Confidence Interval: [nan, nan]
Prediction: 4646.89, Confidence Interval: [nan, nan]
Prediction: 7312.99, Confidence Interval: [nan, nan]
Prediction: 2321.79, Confidence Interval: [nan, nan]
Prediction: 6271.74, Confidence Interval: [nan, nan]
Prediction: 4292.44, Confidence Interval: [nan, nan]
Prediction: 4335.76, Confidence Interval: [nan, nan]
Prediction: 3385.36, Confidence Interval: [nan, nan]
Prediction: 4937.10, Confidence Interval: [nan, nan]
Prediction: 1077.51, Confidence Interval: [nan, nan]
Prediction: 4143.07, Confidence Interval: [nan, nan]
Prediction: 9107.45, Confidence Interval: [nan, nan]
Prediction: 2343.04, Confidence Interval: [nan, nan]
Prediction: 5583.90, Confidence Interval: [nan, nan]
Prediction: 6670.71, Confidence Interval: [nan, nan]
Prediction: 5172.98, Confidence Interval: [nan

In [249]:
a = np.array([[1, 2], [3, 4]])
v1 = np.var(a)
print(v1)
v2 = np.var(a, axis=0)
print(v2)
v3  = np.var(a, axis=1)
print(v3)

1.25
[1. 1.]
[0.25 0.25]


In [250]:
a

array([[1, 2],
       [3, 4]])

In [262]:
# Termino 1
np.sum((a - np.mean(a, axis=0))**2 / np.var(a, axis=0),axis=1)

array([2., 2.])

In [256]:
#Termino 2
np.var(a, axis=0)

array([1., 1.])

In [234]:
tf = np.sum((a - np.mean(a, axis=0))**2 / np.var(a, axis=0), axis=1)

In [235]:
tf

array([2., 2.])