In [1]:
import warnings
warnings.simplefilter("ignore", category=UserWarning)
%run Data_and_catalogs_proyect.ipynb

# Here i show the graphic with three parameters

In [2]:
import pandas as pd
import plotly.express as px
import ipywidgets as widgets
from ipywidgets import interact

def graficar_interactivo(df, eje_x, eje_y, tamaño):
    df_limpio = df.dropna(subset=[tamaño])
    
    fig = px.scatter(df_limpio, x=eje_x, y=eje_y, size=tamaño,
                     title=f"Gráfico de dispersión: {eje_x} vs {eje_y} (Tamaño: {tamaño})",
                     labels={eje_x: eje_x, eje_y: eje_y, tamaño: tamaño})
    
    fig.show()

eje_x_widget = widgets.Dropdown(
    options=df_final.columns.tolist(),
    value=df_final.columns[0],
    description='Eje X:',
)

eje_y_widget = widgets.Dropdown(
    options=df_final.columns.tolist(),
    value=df_final.columns[1],
    description='Eje Y:',
)

tamaño_widget = widgets.Dropdown(
    options=[col for col in df_final.columns if pd.api.types.is_numeric_dtype(df_final[col])],
    value=df_final.select_dtypes(include=['number']).columns[0],
    description='Tamaño:',
)

interact(graficar_interactivo, df=widgets.fixed(df_final), eje_x=eje_x_widget, eje_y=eje_y_widget, tamaño=tamaño_widget)

interactive(children=(Dropdown(description='Eje X:', options=('Nombre', 'Spectype', 'Mo', 'Period', 'Eccentric…

<function __main__.graficar_interactivo(df, eje_x, eje_y, tamaño)>

# Here I make the correlation matrix, positive/negative dendograms and also venn diagram for the selected class of HMXB's and parameters from Neumann and Fortin Catalog

In [9]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from ipywidgets import interact, widgets
import venn
import scipy.cluster.hierarchy as sch
from scipy.spatial import distance as ssd
import warnings

warnings.filterwarnings("ignore", category=UserWarning, module="venn")

class_column = 'Class'

def Venn_diagram(selected_columns):
    sets = [set(df_final.index[df_final[col].notna()]) for col in selected_columns]
    labels = venn.get_labels(sets, fill=['number'])

    fig, ax = venn.venn6(labels, names=selected_columns)
    for text in ax.texts:
        text.set_fontsize(8)
    plt.title(f"Venn Diagram for {len(selected_columns)} variables")
    plt.show()

def plot_dendrogram(correlation_matrix):
    matriz_positiva = correlation_matrix[correlation_matrix > 0].fillna(0)
    distancia_positiva = 1 - matriz_positiva
    vinculos_positivos = sch.linkage(ssd.squareform(distancia_positiva), method='ward')

    plt.figure(figsize=(8, 6))
    sch.dendrogram(vinculos_positivos, labels=matriz_positiva.columns, leaf_rotation=45, leaf_font_size=10)
    plt.title('Dendrogram for Positive Correlation')
    plt.show()

    matriz_negativa = correlation_matrix[correlation_matrix < 0].fillna(0)
    np.fill_diagonal(matriz_negativa.values, 1)
    distancia_negativa = 1 - np.abs(matriz_negativa)
    vinculos_negativos = sch.linkage(ssd.squareform(distancia_negativa), method='ward')

    plt.figure(figsize=(8, 6))
    sch.dendrogram(vinculos_negativos, labels=matriz_negativa.columns, leaf_rotation=45, leaf_font_size=10)
    plt.title('Dendrogram for Negative Correlation')
    plt.show()

def correlation_matrix_all(selected_classes, selected_columns):
    if len(selected_columns) == 0:
        print("Por favor selecciona al menos una variable numérica.")
        return

    if len(selected_columns) < 2:
        print("Selecciona al menos dos variables para generar el dendrograma.")
        return

    if len(selected_classes) == 0:
        print("Por favor selecciona al menos una clase.")
        return
    
    numeric_columns = df_final.select_dtypes(include=[np.number]).columns.tolist()
    selected_columns = list(selected_columns)
    selected_columns = [col for col in selected_columns if col in numeric_columns]
    
    if len(selected_columns) > 0:
        filtered_df_final = df_final[df_final[class_column].isin(selected_classes)][selected_columns + ['Nombre']]
        
        filtered_df_final_complete = filtered_df_final.dropna(subset=selected_columns)
        num_objects = len(filtered_df_final_complete)
        
        if num_objects == 0:
            print("No hay objetos con valores completos para las variables seleccionadas.")
            return
        
        kendall_corr = filtered_df_final_complete[selected_columns].corr(method='kendall')
        
        fig, ax = plt.subplots(figsize=(10, 8))
        sns.heatmap(kendall_corr, annot=True, cmap="coolwarm", cbar=True, center=0, ax=ax)
        ax.set_title(f"Matriz de Correlación (Kendall) - {', '.join(selected_classes)} \n({num_objects} objetos con valores completos)")
        
        plt.tight_layout()
        plt.show()
        
        print(f"Objetos con valores completos para los parámetros seleccionados ({num_objects} objetos):")
        print(filtered_df_final_complete[['Nombre'] + selected_columns])
        
        if len(selected_columns) >= 2 and len(selected_columns) <= 6:
            Venn_diagram(selected_columns)
        else:
            print("El diagrama de Venn solo es compatible con entre 2 y 6 variables.")
        
        plot_dendrogram(kendall_corr)
        
    else:
        print("Por favor selecciona al menos una variable numérica.")

unique_classes = [x for x in unique_classes if pd.notna(x)]
numeric_columns = [col for col in numeric_columns if pd.notnull(col)]

interact(
    correlation_matrix_all,
    selected_classes=widgets.SelectMultiple(
        options=unique_classes,
        value=unique_classes,
        description='Classes:',
        disabled=False
    ),
    selected_columns=widgets.SelectMultiple(
        options=numeric_columns,
        value=numeric_columns[:8],
        description='Parameters:',
        disabled=False
    )
)

interactive(children=(SelectMultiple(description='Classes:', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, …

<function __main__.correlation_matrix_all(selected_classes, selected_columns)>

# Here I request all systems where there are missing values in one (or more) column, except in the other ones.

In [10]:
df_final

Unnamed: 0,Nombre,Spectype,Mo,Period,Eccentricity,Spin_period,Distance,Class,SpType,Mean_Mass,Teff,N_H,Max_Soft_Flux,Min_Soft_Flux,Max_Hard_Flux,Min_Hard_Flux,Mean_Soft_Flux,Mean_Hard_Flux,Hardness
0,IGR J00370+6122,BN0.7 Ib,22.0,15.66490,0.4800,674.0000,3401.0,sg,BN0.5II-III / BN0.7Ib,,15411.1,11.820,95.5227,0.1253,18.420,8.008,47.82400,13.2140,0.276305
1,gam Cas,B0.5IVpe,13.0,203.37100,0.2600,,,Be,B0.5IVpe,,,,274.0580,157.7170,83.440,83.440,215.88750,83.4400,0.386498
2,2S 0114+650,B1Iae,16.0,11.59830,0.1800,10008.0000,4475.0,sg,B1Iae,,14131.5,,130.8930,101.1000,155.400,155.400,115.99650,155.4000,1.339696
3,IGR J01363+6610,B1Ve,12.5,159.00000,,,5816.0,Be,B1Ve,,,,0.1174,0.1174,,,0.11740,,
4,RX J0146.9+6121,B1IIIe,9.6,330.00000,,1407.4000,2751.0,Be,B1III-Ve,,,3.908,105.5000,25.4422,,,65.47110,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,GRO J2058+42,O9.5-B0IV-Ve,18.0,55.00000,,195.2500,8861.0,Be,O9.5-B0IV-Ve,,31462.3,,167.9000,167.9000,,,167.90000,,
95,SAX J2103.5+4545,B0Ve,17.5,12.66536,0.4055,358.6100,6218.0,Be,B0Ve,,29372.9,28.000,111.6000,0.2226,97.955,68.150,55.91130,83.0525,1.485433
96,Cep X-4,B1-B2Ve,10.8,20.85000,,65.3508,7446.0,Be,B1-B2Ve,,,7.115,470.1000,0.6595,,,235.37975,,
97,1H 2202+501,Be,,,,,1116.0,Be,B3e,,,,,,,,,,


In [11]:
import pandas as pd
import numpy as np
from ipywidgets import interact, widgets

def correlation_matrix_all(selected_classes, selected_columns_nan, selected_columns_non_nan):
    if len(selected_columns_nan) == 0 and len(selected_columns_non_nan) == 0:
        print("Por favor selecciona al menos una columna con valores nulos o valores conocidos.")
        return

    if len(selected_classes) == 0:
        print("Por favor selecciona al menos una clase.")
        return
    
    numeric_columns = df_final.select_dtypes(include=[np.number]).columns.tolist()
    selected_columns_nan = list(selected_columns_nan)
    selected_columns_non_nan = list(selected_columns_non_nan)
    
    selected_columns_nan = [col for col in selected_columns_nan if col in numeric_columns]
    selected_columns_non_nan = [col for col in selected_columns_non_nan if col in numeric_columns]
    
    if len(selected_columns_nan) > 0 or len(selected_columns_non_nan) > 0:
        filtered_df_final = df_final[df_final[class_column].isin(selected_classes)]

        missing_data = filtered_df_final[filtered_df_final[selected_columns_nan].isnull().all(axis=1)]

        complete_data = filtered_df_final[filtered_df_final[selected_columns_non_nan].notnull().all(axis=1)]
        intersection_data = pd.merge(
            missing_data, 
            complete_data, 
            how='inner', 
            on='Nombre', 
            suffixes=('_nan', '_non_nan')
        )

        if intersection_data.empty:
            print("No hay sistemas que cumplan ambas condiciones (valores nulos en algunas columnas y completos en otras).")
        else:
            print("Sistemas que tienen valores nulos en las columnas seleccionadas y valores completos en las otras columnas:")
            display(intersection_data)
        
    else:
        print("Por favor selecciona al menos una columna con valores nulos o valores conocidos.")

unique_classes = [x for x in unique_classes if pd.notna(x)]
numeric_columns = [col for col in numeric_columns if pd.notnull(col)]

interact(
    correlation_matrix_all,
    selected_classes=widgets.SelectMultiple(
        options=unique_classes,
        value=unique_classes,
        description='Classes:',
        disabled=False
    ),
    selected_columns_nan=widgets.SelectMultiple(
        options=numeric_columns,
        value=[],
        description='NaNs:',
        disabled=False
    ),
    selected_columns_non_nan=widgets.SelectMultiple(
        options=numeric_columns,
        description='no NaNs:',
        disabled=False
    )
)


interactive(children=(SelectMultiple(description='Classes:', index=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, …

<function __main__.correlation_matrix_all(selected_classes, selected_columns_nan, selected_columns_non_nan)>

# Now I make the histogram for the distance in the Distance Column of Fortin and the BAT/Swift Flux in the Actualized Neuman catalog.

In [None]:
cat_neuman_2['mean_BAT_flux'] = (cat_neuman_2['BAT_max_flux'] + cat_neuman_2['BAT_min_flux']) / 2

cat_neuman_2.head()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

common_names = cat_neuman_2['Name'].isin(v2023_09_Fortin['Main_ID'])
cat_neuman_2_common = cat_neuman_2[common_names]
fortin_common = v2023_09_Fortin[common_names]

common_data = pd.merge(cat_neuman_2_common[['Name', 'mean_BAT_flux']], 
                       fortin_common[['Main_ID', 'Distance']], 
                       left_on='Name', right_on='Main_ID')

parsec_to_cm = 3.086e18
common_data['Distance_cm'] = common_data['Distance'] * parsec_to_cm

pi = np.pi
common_data['Luminosity'] = 4 * pi * common_data['mean_BAT_flux'] * common_data['Distance_cm']**2

plt.figure(figsize=(8, 6))
plt.hist(common_data['Luminosity'], bins=10, color='blue', alpha=0.7)
plt.xscale('log')
plt.yscale('log')
plt.title('Histogram of Luminosity (Distance in Parsecs)')
plt.xlabel('Luminosity [erg/s] for BAT/Swift Fluxes')
plt.ylabel('Frequency')
plt.xlim(10e40,10e60)
plt.grid(True, which="both", linestyle="--", alpha=0.7)
plt.show()


In [None]:
'B0 IIIne' in cat_neuman['SpType'].unique()