In [1]:
#instalamos dash y dash_bootstrap_components
!pip install dash_bootstrap_components

^C


Collecting dash_bootstrap_components
  Downloading dash_bootstrap_components-1.6.0-py3-none-any.whl.metadata (5.2 kB)
Collecting dash>=2.0.0 (from dash_bootstrap_components)
  Downloading dash-2.18.1-py3-none-any.whl.metadata (10 kB)
Collecting plotly>=5.0.0 (from dash>=2.0.0->dash_bootstrap_components)
  Downloading plotly-5.24.1-py3-none-any.whl.metadata (7.3 kB)
Collecting dash-html-components==2.0.0 (from dash>=2.0.0->dash_bootstrap_components)
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-core-components==2.0.0 (from dash>=2.0.0->dash_bootstrap_components)
  Downloading dash_core_components-2.0.0-py3-none-any.whl.metadata (2.9 kB)
Collecting dash-table==5.0.0 (from dash>=2.0.0->dash_bootstrap_components)
  Downloading dash_table-5.0.0-py3-none-any.whl.metadata (2.4 kB)
Collecting importlib-metadata (from dash>=2.0.0->dash_bootstrap_components)
  Downloading importlib_metadata-8.5.0-py3-none-any.whl.metadata (4.8 kB)
Collecting retrying

In [3]:
#importamos las librerias necesarias
import numpy as np
try:
  import dash
except ImportError as e:
  !pip install dash_bootstrap_components
  import dash
from dash import dcc, html, Input, Output
import dash_bootstrap_components as dbc
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_wine, load_iris, load_breast_cancer, fetch_covtype

# Crear un layout personalizado
custom_dark_layout = {
    'plot_bgcolor': '#4a4a4a',  # Color de fondo del gráfico
    'paper_bgcolor': '#4a4a4a',  # Color de fondo del área de trabajo
    'font': {'color': 'white'},  # Color del texto
}

# Estilos para componentes
CONTENT_STYLE = {
    "margin-left": "2rem",
    "margin-right": "2rem",
    "margin-top": "4rem",
    "padding": "2rem 1rem",
    "background-color": '#4a4a4a',
    "border-radius": "10px",
}

CARD_STYLE = {
    "background-color": '#4a4a4a',
    "color": "white",
    "border-radius": "10px",
    "padding": "1rem",
    "margin-bottom": "1rem",
    "border": "2px solid white",
    "margin-top": "1rem",
}

# Cargar los datasets disponibles
datasets = {
    "Calidad del Vino": load_wine(),
    "Iris": load_iris(),
    "Cáncer de Pecho": load_breast_cancer()
}

# Crear la aplicación Dash
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Layout de la aplicación
app.layout = dbc.Container([
    dbc.Row(dbc.Col(html.H1("PCA"), className="text-center mb-4", style={"color": "white"})),

    dbc.Row([
        dbc.Col([
            dbc.Label("Selecciona un Dataset:", style={"color": "white"}),
            dcc.Dropdown(
                id='dataset-dropdown',
                options=[{'label': name, 'value': name} for name in datasets.keys()],
                value='Calidad del Vino',  # Dataset por defecto
                className="mb-3"
                ),
            dbc.Label("Número de Componentes Principales:", style={"color": "white"}),
            dcc.Slider(
                id='n-components-slider',
                min=1,
                max=10,
                step=1,
                value=4,
                marks={i: str(i) for i in range(1, 11)},
                tooltip={"placement": "bottom", "always_visible": True}
            )
            ], width=4),
        ]),

    dbc.Row([
        dbc.Col(dbc.Card([
            dbc.CardBody([
                html.H5("Matriz de Correlación", className="card-title"),
                dcc.Graph(id='correlation-matrix')
            ])
            ], style=CARD_STYLE), width=12),
        ]),

    dbc.Row([
        dbc.Col(dbc.Card([
            dbc.CardBody([
                html.H5("Gráfico PCA", className="card-title"),
                html.Label("Seleccionar Componente X"),
                dcc.Dropdown(
                    id='x-component',
                    value=0,
                    clearable=False,
                    style={'color': 'black', 'backgroundColor': 'white'}
                    ),

                html.Label("Seleccionar Componente Y"),
                dcc.Dropdown(
                    id='y-component',
                    value=1,
                    clearable=False,
                    style={'color': 'black', 'backgroundColor': 'white'}
                    ),
                dcc.Graph(id='pca-dataset-graph')
            ])
            ], style=CARD_STYLE), width=12),
        ]),

    dbc.Row([
        dbc.Col(dbc.Card([
            dbc.CardBody([
                html.H5("Mapa de Calor", className="card-title"),
                dcc.Graph(id='heatmap-graph')
            ])
            ], style=CARD_STYLE), width=12),
        ]),

    dbc.Row([
        dbc.Col(dbc.Card([
            dbc.CardBody([
                html.H5("Pesos Principales", className="card-title"),
                dcc.Graph(id='top-weights-table')
            ])
            ], style=CARD_STYLE), width=12),
        ]),

    dbc.Row([
        dbc.Col(dbc.Card([
            dbc.CardBody([
                html.H5("Varianza Explicada", className="card-title"),
                dcc.Graph(id='explained-variance-graph')
            ])
            ], style=CARD_STYLE), width=12),
        ]),

    dbc.Row([
        dbc.Col(dbc.Card([
            dbc.CardBody([
                html.H5("Matriz de Dispersión", className="card-title"),
                dcc.Graph(id='scatter-matrix-graph')
            ])
            ], style=CARD_STYLE), width=12),
        ]),

    dbc.Row(dbc.Col(html.Div(id='components-output'), className="text-center mt-4"))
], style=CONTENT_STYLE)

# Callback para actualizar los componentes de la aplicación
@app.callback(
    [Output('n-components-slider', 'max'),
     Output('n-components-slider', 'marks'),
     Output('n-components-slider', 'value'),
     Output('components-output', 'children'),
     Output('correlation-matrix', 'figure'),
     Output('pca-dataset-graph', 'figure'),
     Output('heatmap-graph', 'figure'),
     Output('top-weights-table', 'figure'),
     Output('explained-variance-graph', 'figure'),
     Output('scatter-matrix-graph', 'figure'),
     Output('x-component', 'options'),
     Output('y-component', 'options'),

     ],

    [Input('dataset-dropdown', 'value'),
     Input('n-components-slider', 'value'),
     Input('x-component', 'value'),
     Input('y-component', 'value')
     ]
)
def update_pca(selected_dataset, n_components,x_comp,y_comp):
    # Obtener el dataset seleccionado
    dataset = datasets[selected_dataset]

    # Preparar los datos en un DataFrame
    feature_names = list(dataset['feature_names'])
    target_name = 'target'

    df = pd.DataFrame(
        data=np.c_[dataset['data'], dataset['target']],
        columns=feature_names + [target_name]
    )

    # Convertir la columna 'target' a categorías si existen nombres de clases
    if hasattr(dataset, 'target_names'):
        df[target_name] = pd.Categorical.from_codes(dataset['target'], dataset['target_names'])
    else:
        df[target_name] = dataset['target']

    X = df.drop(columns=[target_name])
    y = df[target_name]

    # Estandarizar los datos
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    X_scaled = pd.DataFrame(X_scaled, columns=X.columns)

    # Determinar el número máximo de componentes para el slider
    max_components = min(X_scaled.shape[1], 10)
    marks = {i: str(i) for i in range(1, max_components + 1)}

    n_components = min(n_components, max_components)

    # Aplicar PCA con el número de componentes seleccionados
    pca = PCA(n_components=n_components)
    X_pca = pca.fit_transform(X_scaled)

    # Crear la matriz de correlación
    correlation_matrix = create_correlation_matrix(X_scaled)
    # Generar opciones filtrando el componente seleccionado en el otro dropdown

    options = [{'label': f'PC{i+1}', 'value': i} for i in range(n_components)]


    x_options = [{'label': f'PC{i+1}', 'value': i} for i in range(n_components) if i != y_comp]
    y_options = [{'label': f'PC{i+1}', 'value': i} for i in range(n_components) if i != x_comp]
    # Ajustar valores predeterminados si no están en las opciones actuales
    if x_comp not in [opt['value'] for opt in x_options]:
        x_comp = x_options[0]['value'] if x_options else 0
    if y_comp not in [opt['value'] for opt in y_options]:
        y_comp = y_options[0]['value'] if y_options else 1

    # Crear el gráfico de PCA
    fig_pca = create_pca_graph(X_pca, X,y,x_comp, y_comp, selected_dataset,pca)

    # Crear un DataFrame con los componentes principales para el mapa de calor
    dirs = pd.DataFrame(pca.components_, columns=X.columns, index=[f'PC{i+1}' for i in range(n_components)])
    fig_heatmap = create_heatmap(dirs)

    # Crear la tabla de pesos principales
    fig_top_weights = create_top_weights_table(pca, X.columns, X_scaled.corr())

    # Calcular la varianza explicada acumulada
    explained_variance = pca.explained_variance_ratio_
    fig_explained_variance = create_explained_variance_graph(explained_variance)

    # Crear el gráfico de cargas de componentes principales
    fig_scatter = create_scatter_matrix_plot(X_scaled, df['target'], pca, n_components)
    fig_pca.update_layout(**custom_dark_layout )

    # Opciones para los dropdowns de componentes X e Y
    dropdown_options = [{'label': f'PC{i+1}', 'value': i} for i in range(n_components)]

    return max_components, marks, n_components, f'Número de Componentes Principales: {n_components}', correlation_matrix, fig_pca, fig_heatmap, fig_top_weights, fig_explained_variance, fig_scatter,x_options, y_options



def create_pca_graph(X_pca, X,y,x_comp, y_comp, selected_dataset,pca):

    fig = px.scatter(
        x=X_pca[:, x_comp],
        y=X_pca[:, y_comp],
        color=y,
        labels={'x': f'PC{x_comp+1}', 'y': f'PC{y_comp+1}', 'color': 'Clases'},
        title='PCA'
    )

    fig.update_layout(
        plot_bgcolor='#4a4a4a',
        paper_bgcolor='#4a4a4a',
        font=dict(color='white'),
        xaxis_title=f'PC{x_comp+1}',
        yaxis_title=f'PC{y_comp+1}'
    )
    loadings = pca.components_.T * np.sqrt(pca.explained_variance_)
    for i, feature in enumerate(X):
        fig.add_annotation(
            ax=0, ay=0,
            axref="x", ayref="y",
            x=loadings[i, x_comp],
            y=loadings[i, y_comp],
            showarrow=True,
            arrowsize=2,
            arrowhead=2,
            arrowcolor='white',
            xanchor="right",
            yanchor="top"
        )
        fig.add_annotation(
            x=loadings[i, x_comp],
            y=loadings[i, y_comp],
            ax=0, ay=0,
            xanchor="center",
            yanchor="bottom",
            text=feature,

            yshift=5,
        )

    return fig


def create_correlation_matrix(data):
    # Crear una matriz de correlación
    corr = data.corr()
    heatmap = go.Heatmap(
        z=corr.values,
    x=corr.columns,
    y=corr.columns,
    colorscale='YlGnBu',  # Escala de colores similar a Seaborn
    colorbar=dict(title="Correlación"),
    zmin=-1,  # Mínimo valor de correlación
    zmax=1    # Máximo valor de correlación
    )

    # Añade anotaciones con los valores de correlación
    annotations = []
    for i, row in enumerate(corr.values):
        for j, value in enumerate(row):
            annotations.append(
              go.layout.Annotation(
                  text=f"{value:.2f}",
                  x=corr.columns[j],
                  y=corr.columns[i],
                  xref="x1",
                  yref="y1",
                  showarrow=False,
                  font=dict(size=10, color="black")
              )
          )

        # Crea la figura y añade el heatmap y las anotaciones
    fig = go.Figure(data=[heatmap])
    fig.update_layout(
        title="Mapa de Correlación",
    xaxis=dict(tickangle=45),
    yaxis=dict(autorange="reversed"),  # Invierte el eje y para que el origen esté en la parte superior izquierda
    annotations=annotations,
    autosize=True,
    template='plotly_dark'
    )
    fig.update_layout(**custom_dark_layout )
    return fig

def create_heatmap(data):
    data = data.iloc[::-1]

    fig = go.Figure(data=go.Heatmap(
        z=data.values,
        x=data.columns,
        y=data.index,
        colorscale='YlGnBu',
        zmin=-1,
        zmax=1,
        colorbar=dict(title='Peso de Componentes')
    ))

    for i in range(len(data.index)):
        for j in range(len(data.columns)):
            fig.add_annotation(
                x=data.columns[j],
                y=data.index[i],
                text=f'{data.values[i, j]:.2f}',
                showarrow=False,
                font=dict(color="white" if data.values[i, j] < 0 else "black"),
                xanchor='center',
                yanchor='middle'
            )

    fig.update_layout(title='Mapa de Calor de los Componentes Principales', template='plotly_dark')
    fig.update_layout(**custom_dark_layout )
    return fig

def create_top_weights_table(pca, feature_names, corr_matrix):
    # Identificar los dos mayores pesos en valor absoluto para cada componente
    loadings = pca.components_.T
    top_weights = []
    for i, component in enumerate(loadings.T):
        # Obtener índices de las dos características con los mayores pesos absolutos
        top_features = np.argsort(np.abs(component))[-2:]  # Índices de los mayores pesos absolutos
        feature1_idx, feature2_idx = top_features
        feature1_name = feature_names[feature1_idx]
        feature2_name = feature_names[feature2_idx]
        weight1 = component[feature1_idx]
        weight2 = component[feature2_idx]
        correlation = corr_matrix.loc[feature1_name, feature2_name]

        top_weights.append([
            f'PC{i+1}',
            feature1_name,
            weight1,
            feature2_name,
            weight2,
            correlation
        ])

    # Crear un DataFrame y luego la figura de la tabla
    top_weights_df = pd.DataFrame(
        top_weights,
        columns=['Componente', 'Característica 1', 'Peso 1', 'Característica 2', 'Peso 2', 'Correlación']
    )
    top_weights_df[['Peso 1', 'Peso 2', 'Correlación']] = top_weights_df[['Peso 1', 'Peso 2', 'Correlación']].round(4)


    fig = go.Figure(data=[go.Table(
        header=dict(values=['Componente', 'Característica 1', 'Peso 1', 'Característica 2', 'Peso 2', 'Correlación']),
        cells=dict(values=[top_weights_df[col] for col in top_weights_df.columns])
    )])

    fig.update_layout(
        title='Mayores Pesos de Componentes Principales y Correlación entre Características',
        template='plotly_dark'
    )
    fig.update_layout(**custom_dark_layout )
    return fig

def create_explained_variance_graph(explained_variance):
    cumulative_variance = np.cumsum(explained_variance)

    # Crear gráfico de barras para la varianza explicada
    fig = go.Figure()

    fig.add_trace(go.Bar(
        x=[f'PC{i+1}' for i in range(len(explained_variance))],
      y=explained_variance,
      name='Individual',
      marker=dict(color='rgba(55, 83, 109, 0.7)')
    ))

    # Añadir línea acumulativa
    fig.add_trace(go.Scatter(
        x=[f'PC{i+1}' for i in range(len(cumulative_variance))],
      y=cumulative_variance,
      mode='lines+markers',
      name='Cumulative',
      line=dict(color='rgba(26, 118, 255, 0.7)')
    ))

        # Personalizar layout
    fig.update_layout(
        title='Varianza Explicada por PCA',
      xaxis=dict(title='Número de Componentes Principales'),
      yaxis=dict(title='Varianza Explicada'),
      template='plotly_dark',
      showlegend=True,
      bargap=0.2,
      legend=dict(
          x=0.8,
        y=1.15,
        traceorder='normal',
        font=dict(
            size=12,
            color="white"
            ),
        bgcolor="rgba(0,0,0,0)"
      )
    )
    fig.update_layout(**custom_dark_layout )

    return fig

def create_scatter_matrix_plot(X, target, pca, n_components):
    components = pca.transform(X)
    labels = {str(i): f"PC {i+1}" for i in range(n_components)}
    labels['color'] = 'Clase'

    fig = px.scatter_matrix(
        components,
        dimensions=range(n_components),
        color=target,
        labels=labels,
        title=f'Matriz de Dispersión de Componentes Principales - {n_components} Componentes'
    )
    fig.update_traces(diagonal_visible=False)
    fig.update_layout(template='plotly_dark')
    fig.update_layout(**custom_dark_layout )
    return fig

if __name__ == '__main__':
    app.run_server(debug=True)