#PREPARACION DE VARIABLES

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn import datasets as db
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from sklearn.decomposition import PCA

SE CREA LA MATRIZ DE CONFUSION

In [2]:
def plot_confusion_matrix(cm, labels):
    fig_cm = px.imshow(cm, labels=dict(x="Predicted", y="Actual", color="Count"),
                       x=labels, y=labels, color_continuous_scale='Viridis', text_auto = True,
                       title="Confusion Matrix")
    fig_cm.update_layout(coloraxis_showscale=False)
    fig_cm.show()

CREACION Y MANIPULACION DE MATRIZ DE IRIS

In [3]:
# La base de datos se carga desde sklearn.datasets
# Se crea el data frame, se agregan los nombres de los campos
# Se concatena los valores nominales de la especie ya que no se encuentran dentro de la matrix

iris=db.load_iris()
df = pd.DataFrame(iris['data'], columns=['Sepal.Length','Sepal.Width','Petal.Length','Petal.Width'])
df['Species']=pd.Categorical.from_codes(iris.target, categories=iris.target_names)
df.head()


Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


ASIGNACION DE VARIABLES Y ESTANDARIZACION DE INFORMACION

In [13]:
X = iris.data  # Features/Informacion y caracteristicas de las variables
y = iris.target  # Labels/Clasificacion de los registros
scaler = StandardScaler() # Asignacion de funcion de escalamiento
X_scaled = scaler.fit_transform(X) # Estandarización de la variabe X


#Componentes Principales 2D

In [25]:
#Estandarizacion de las variables de entrenamiento y de prediccion
scaler = StandardScaler()

# COMPONENTES PRINCIPALES ANALISIS
pca = PCA(n_components=2)

# Valores verdaderos estandarizados
X_pca_2d = pca.fit_transform(X_scaled)
pca_df_2d = pd.DataFrame(data=X_pca_2d, columns=['PC1', 'PC2'])
pca_df_2d['Species'] = y
display(pca_df_2d.head())

# VARIANZA EXPLICADA EN 2 DIMENSIONES
explained_variance_2d = pca.explained_variance_ratio_
components = pd.DataFrame(abs(pca.components_), columns=iris.feature_names)
print("\n Varianza explicada para el primer y segundo vector (2D):\n", explained_variance_2d)
print('\n Con los componentes descritos a continuacion')
display(components)

#Representacion visual de la modificación de los elementos
fig_2d = px.scatter(pca_df_2d, x='PC1', y='PC2', color='Species', template = 'plotly_white', title = 'PCA - 2D Components')
fig_2d.show()

Unnamed: 0,PC1,PC2,Species
0,-2.264703,0.480027,0
1,-2.080961,-0.674134,0
2,-2.364229,-0.341908,0
3,-2.299384,-0.597395,0
4,-2.389842,0.646835,0



 Varianza explicada para el primer y segundo vector (2D):
 [0.72962445 0.22850762]

 Con los componentes descritos a continuacion


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,0.521066,0.269347,0.580413,0.564857
1,0.377418,0.923296,0.024492,0.066942


# componentes principales 3D

In [24]:
pca_3d = PCA(n_components=3)

#Valores verdaderos Estandarizados
X_pca_3d = pca_3d.fit_transform(X_scaled)
pca_df_3d = pd.DataFrame(data=X_pca_3d, columns=['PC1', 'PC2', 'PC3'])
pca_df_3d['Species'] = y
display(pca_df_3d.head())

#Varianza explicada en 3 Dimensiones
explained_variance_3d = pca_3d.explained_variance_ratio_
components = pd.DataFrame(abs(pca_3d.components_), columns=iris.feature_names)
print("\n Varianza explicada para el primer, segundo y tercer vector (3D):\n", explained_variance_3d)
print('\n Con los componentes descritos a continuacion')
display(components)

#Representacion Grafica de la transformacion
fig_3d = px.scatter_3d(pca_df_3d, x='PC1', y='PC2', z='PC3', color='Species', template = 'plotly_white', title = 'PCA - 3 Components')
fig_3d.show()


Unnamed: 0,PC1,PC2,PC3,Species
0,-2.264703,0.480027,-0.127706,0
1,-2.080961,-0.674134,-0.234609,0
2,-2.364229,-0.341908,0.044201,0
3,-2.299384,-0.597395,0.09129,0
4,-2.389842,0.646835,0.015738,0



 Varianza explicada para el primer, segundo y tercer vector (3D):
 [0.72962445 0.22850762 0.03668922]

 Con los componentes descritos a continuacion


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,0.521066,0.269347,0.580413,0.564857
1,0.377418,0.923296,0.024492,0.066942
2,0.719566,0.244382,0.142126,0.634273


# Regresion Logistica 2D

In [28]:
#Particion de la informacion para el entrenamiento y predicción
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7)

#Estandarizacion de las variables de entrenamiento y de prediccion
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# COMPONENTES PRINCIPALES ANALISIS
pca = PCA(n_components=2)

# Para variables predictorias y entrenamiento
X_train_pca_2d = pca.fit_transform(X_train_scaled)
X_test_pca_2d = pca.transform(X_test_scaled)

# Regresion logistica
lr_pca_2d = LogisticRegression()
lr_pca_2d.fit(X_train_pca_2d, y_train) #Ajuste de las variables
lr_pca_2d_pred = lr_pca_2d.predict(X_test_pca_2d) #Prediccion de las variables
lr_pca_2d_accuracy = accuracy_score(y_test, lr_pca_2d_pred) # Exactitud
lr_pca_2d_precision = precision_score(y_test, lr_pca_2d_pred,  average="micro") #Precision
lr_pca_2d_recall = recall_score(y_test, lr_pca_2d_pred,  average="micro") #Repetibilidad
lr_pca_2d_f1 = f1_score(y_test, lr_pca_2d_pred,  average="micro") #Prueba
lr_pca_2d_report = classification_report(y_test, lr_pca_2d_pred) #Reporte de clasificacion
print("Logistic Regression PCA 2D Classification Report:")
print(lr_pca_2d_report)

#Matriz de confusion
lr_pca_2d_cm = confusion_matrix(y_test, lr_pca_2d_pred)
plot_confusion_matrix(lr_pca_2d_cm, ['setosa','versicolor','virginica'])

Logistic Regression PCA 2D Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       0.67      0.83      0.74        12
           2       0.75      0.55      0.63        11

    accuracy                           0.77        30
   macro avg       0.81      0.79      0.79        30
weighted avg       0.78      0.77      0.76        30



#Regresion Logistica 3D

In [32]:
#componentes principales
pca = PCA(n_components=3)

#Particion de la informacion para el entrenamiento y predicción
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7)

#Estandarizacion de las variables de entrenamiento y de prediccion
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Para variables predictorias y entrenamiento
X_train_pca_3d = pca.fit_transform(X_train_scaled)
X_test_pca_3d = pca.transform(X_test_scaled)

#Regresion logistica 3D
lr_pca_3d = LogisticRegression()
lr_pca_3d.fit(X_train_pca_3d, y_train) #Ajuste de las variables
lr_pca_3d_pred = lr_pca_3d.predict(X_test_pca_3d) #Prediccion de las variables
lr_pca_3d_accuracy = accuracy_score(y_test, lr_pca_3d_pred) #Exactitud
lr_pca_3d_precision = precision_score(y_test, lr_pca_3d_pred, average='micro') #Precision
lr_pca_3d_recall = recall_score(y_test, lr_pca_3d_pred, average='micro') #repetibilidad
lr_pca_3d_f1 = f1_score(y_test, lr_pca_3d_pred, average='micro') #prueba
lr_pca_3d_report = classification_report(y_test, lr_pca_3d_pred) #reporte
print("Logistic Regression PCA 3D Classification Report:")
print(lr_pca_3d_report)

#matriz de confusion
lr_pca_3d_cm = confusion_matrix(y_test, lr_pca_3d_pred)
plot_confusion_matrix(lr_pca_3d_cm, ['setosa','versicolor', 'virginica'])

Logistic Regression PCA 3D Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       0.83      0.83      0.83        12
           2       0.82      0.82      0.82        11

    accuracy                           0.87        30
   macro avg       0.88      0.88      0.88        30
weighted avg       0.87      0.87      0.87        30



#KNN 2D

In [33]:
# K vecinos mas cercanos
knn_pca_2d = KNeighborsClassifier()

# Para variables predictorias y entrenamiento
knn_pca_2d.fit(X_train_pca_2d, y_train) #Ajuste de las variables
knn_pca_2d_pred = knn_pca_2d.predict(X_test_pca_2d) #Prediccion de las variables
knn_pca_2d_accuracy = accuracy_score(y_test, knn_pca_2d_pred) #Exactitud
knn_pca_2d_precision = precision_score(y_test, knn_pca_2d_pred, average="micro") #Precision
knn_pca_2d_recall = recall_score(y_test, knn_pca_2d_pred,  average="micro") #repetibilidad
knn_pca_2d_f1 = f1_score(y_test, knn_pca_2d_pred,  average="micro") #Prueba
knn_pca_2d_report = classification_report(y_test, knn_pca_2d_pred) #Reporte
print("KNN PCA 2D Classification Report:")
print(knn_pca_2d_report)

#matriz de confusion
knn_pca_2d_cm = confusion_matrix(y_test, knn_pca_2d_pred)
plot_confusion_matrix(knn_pca_2d_cm, ['setosa','versicolor', 'virginica'])

KNN PCA 2D Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       0.73      0.92      0.81        12
           2       0.88      0.64      0.74        11

    accuracy                           0.83        30
   macro avg       0.87      0.85      0.85        30
weighted avg       0.85      0.83      0.83        30



#KNN 3D

In [34]:
# K vecinos mas cercanos
knn_pca_3d = KNeighborsClassifier()

# Para variables predictorias y entrenamiento
knn_pca_3d.fit(X_train_pca_3d, y_train) #Ajuste de las variables
knn_pca_3d_pred = knn_pca_3d.predict(X_test_pca_3d) #Prediccion de las variables
knn_pca_3d_accuracy = accuracy_score(y_test, knn_pca_3d_pred) #Exactitud
knn_pca_3d_precision = precision_score(y_test, knn_pca_3d_pred, average="micro") #Precision
knn_pca_3d_recall = recall_score(y_test, knn_pca_3d_pred, average="micro") #repetibilidad
knn_pca_3d_f1 = f1_score(y_test, knn_pca_3d_pred, average="micro") #Prueba
knn_pca_3d_report = classification_report(y_test, knn_pca_3d_pred) #Reporte
print("KNN PCA 3D Classification Report:")
print(knn_pca_3d_report)

#matriz de confusion
knn_pca_3d_cm = confusion_matrix(y_test, knn_pca_3d_pred)
plot_confusion_matrix(knn_pca_3d_cm, ['setosa','versicolor', 'virginica'])

KNN PCA 3D Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       0.79      0.92      0.85        12
           2       0.89      0.73      0.80        11

    accuracy                           0.87        30
   macro avg       0.89      0.88      0.88        30
weighted avg       0.87      0.87      0.87        30



#SVM 2D


In [35]:
#SUPORT VECTOR MACHINE
svm_pca_2d = SVC()

svm_pca_2d.fit(X_train_pca_2d, y_train) #Ajuste de las variables
svm_pca_2d_pred = svm_pca_2d.predict(X_test_pca_2d) #Prediccion de las variables
svm_pca_2d_accuracy = accuracy_score(y_test, svm_pca_2d_pred) #Exactitud
svm_pca_2d_precision = precision_score(y_test, svm_pca_2d_pred, average="micro") #Precision
svm_pca_2d_recall = recall_score(y_test, svm_pca_2d_pred, average="micro") #repetibilidad
svm_pca_2d_f1 = f1_score(y_test, svm_pca_2d_pred, average="micro") #Prueba
svm_pca_2d_report = classification_report(y_test, svm_pca_2d_pred) #Reporte
print("SVM PCA 2D Classification Report:")
print(svm_pca_2d_report)

#matriz de confusion

svm_pca_2d_cm = confusion_matrix(y_test, svm_pca_2d_pred)
plot_confusion_matrix(svm_pca_2d_cm, ['setosa','versicolor', 'virginica'])


SVM PCA 2D Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       0.71      0.83      0.77        12
           2       0.78      0.64      0.70        11

    accuracy                           0.80        30
   macro avg       0.83      0.82      0.82        30
weighted avg       0.80      0.80      0.80        30



#SVM 3D

In [36]:
#SUPORT VECTOR MACHINE
svm_pca_3d = SVC()

svm_pca_3d.fit(X_train_pca_3d, y_train) #Ajuste de las variables
svm_pca_3d_pred = svm_pca_3d.predict(X_test_pca_3d) #Prediccion de las variables
svm_pca_3d_accuracy = accuracy_score(y_test, svm_pca_3d_pred) #Exactitud
svm_pca_3d_precision = precision_score(y_test, svm_pca_3d_pred, average="micro") #Precision
svm_pca_3d_recall = recall_score(y_test, svm_pca_3d_pred, average="micro") #repetibilidad
svm_pca_3d_f1 = f1_score(y_test, svm_pca_3d_pred, average="micro") #Prueba
svm_pca_3d_report = classification_report(y_test, svm_pca_3d_pred) #Reporte
print("SVM PCA 3D Classification Report:")
print(svm_pca_3d_report)

#matriz de confusion
svm_pca_3d_cm = confusion_matrix(y_test, svm_pca_3d_pred)
plot_confusion_matrix(svm_pca_3d_cm, ['setosa','versicolor', 'virginica'])

SVM PCA 3D Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       0.83      0.83      0.83        12
           2       0.82      0.82      0.82        11

    accuracy                           0.87        30
   macro avg       0.88      0.88      0.88        30
weighted avg       0.87      0.87      0.87        30



# NAIVE BAYES 2D

In [37]:
#NAIVE BAYES
nb_pca_2d = GaussianNB()


nb_pca_2d.fit(X_train_pca_2d, y_train) #Ajuste de las variables
nb_pca_2d_pred = nb_pca_2d.predict(X_test_pca_2d) #Prediccion de las variables
nb_pca_2d_accuracy = accuracy_score(y_test, nb_pca_2d_pred) #Exactitud
nb_pca_2d_precision = precision_score(y_test, nb_pca_2d_pred, average="micro") #Precision
nb_pca_2d_recall = recall_score(y_test, nb_pca_2d_pred, average="micro") #repetibilidad
nb_pca_2d_f1 = f1_score(y_test, nb_pca_2d_pred, average="micro") #Prueba
nb_pca_2d_report = classification_report(y_test, nb_pca_2d_pred) #Reporte
print("Naive Bayes PCA 2D Classification Report:")
print(nb_pca_2d_report)

#matriz de confusion
nb_pca_2d_cm = confusion_matrix(y_test, nb_pca_2d_pred)
plot_confusion_matrix(nb_pca_2d_cm, ['setosa','versicolor', 'virginica'])

Naive Bayes PCA 2D Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       0.67      0.83      0.74        12
           2       0.75      0.55      0.63        11

    accuracy                           0.77        30
   macro avg       0.81      0.79      0.79        30
weighted avg       0.78      0.77      0.76        30



#NAIVE BAYES 3D

In [38]:
#NAIVE BAYES
nb_pca_3d = GaussianNB()


nb_pca_3d.fit(X_train_pca_3d, y_train) #Ajuste de las variables
nb_pca_3d_pred = nb_pca_3d.predict(X_test_pca_3d) #Prediccion de las variables
nb_pca_3d_accuracy = accuracy_score(y_test, nb_pca_3d_pred) #Exactitud
nb_pca_3d_precision = precision_score(y_test, nb_pca_3d_pred, average="micro") #Precision
nb_pca_3d_recall = recall_score(y_test, nb_pca_3d_pred, average="micro") #repetibilidad
nb_pca_3d_f1 = f1_score(y_test, nb_pca_3d_pred, average="micro") #Prueba
nb_pca_3d_report = classification_report(y_test, nb_pca_3d_pred) #Reporte
print("Naive Bayes PCA 3D Classification Report:")
print(nb_pca_3d_report)

#matriz de confusion
nb_pca_3d_cm = confusion_matrix(y_test, nb_pca_3d_pred)
plot_confusion_matrix(nb_pca_3d_cm, ['setosa','versicolor', 'virginica'])

Naive Bayes PCA 3D Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       0.82      0.75      0.78        12
           2       0.75      0.82      0.78        11

    accuracy                           0.83        30
   macro avg       0.86      0.86      0.86        30
weighted avg       0.84      0.83      0.83        30



# REPORTE

In [42]:
models_data2D = {
    'Model': ['Logistic Regression', 'K-Nearest Neighbors', 'SVM', 'Naive Bayes'],
    'Accuracy': [lr_pca_2d_accuracy, knn_pca_2d_accuracy, svm_pca_2d_accuracy, nb_pca_2d_accuracy],
    'Precision': [lr_pca_2d_precision, knn_pca_2d_precision, svm_pca_2d_precision, nb_pca_2d_precision],
    'Recall': [lr_pca_2d_recall, knn_pca_2d_recall, svm_pca_2d_recall, nb_pca_2d_recall],
    'F1-Score': [lr_pca_2d_f1, knn_pca_2d_f1, svm_pca_2d_f1, nb_pca_2d_f1]
}

models_data3D = {
    'Model': ['Logistic Regression', 'K-Nearest Neighbors', 'SVM', 'Naive Bayes'],
    'Accuracy': [lr_pca_3d_accuracy, knn_pca_3d_accuracy, svm_pca_3d_accuracy, nb_pca_3d_accuracy],
    'Precision': [lr_pca_3d_precision, knn_pca_3d_precision, svm_pca_3d_precision, nb_pca_3d_precision],
    'Recall': [lr_pca_3d_recall, knn_pca_3d_recall, svm_pca_3d_recall, nb_pca_3d_recall],
    'F1-Score': [lr_pca_3d_f1, knn_pca_3d_f1, svm_pca_3d_f1, nb_pca_3d_f1]
}


dfp_models2D = pd.DataFrame(models_data2D)
dfp_models3D = pd.DataFrame(models_data3D)

print('A continuacion se muestra dos tablas comparativas de los metodos de prediccion\n Transformacion a 2 variables \n')
display(dfp_models2D)
print('\n Transformacion a 3 variables \n')
display(dfp_models3D)
print('\n Queda a interpretacion del usuario el nivel de riesgo que desea tomar para la eleccion de transformacion\n\n\n')


A continuacion se muestra dos tablas comparativas de los metodos de prediccion
 Transformacion a 2 variables 



Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,Logistic Regression,0.766667,0.766667,0.766667,0.766667
1,K-Nearest Neighbors,0.833333,0.833333,0.833333,0.833333
2,SVM,0.8,0.8,0.8,0.8
3,Naive Bayes,0.766667,0.766667,0.766667,0.766667



 Transformacion a 3 variables 



Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,Logistic Regression,0.866667,0.866667,0.866667,0.866667
1,K-Nearest Neighbors,0.866667,0.866667,0.866667,0.866667
2,SVM,0.866667,0.866667,0.866667,0.866667
3,Naive Bayes,0.833333,0.833333,0.833333,0.833333



 Queda a interpretacion del usuario el nivel de riesgo que desea tomar para la eleccion de transformacion



