In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import OPTICS
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Rayos C칩smicos/Meiga/Tesis_OPTICS.csv')

In [None]:
df_mezclado = df.sample(frac=1).reset_index(drop=True)
df_test = df_mezclado.head(1000)
df_test = df_mezclado.copy()

In [None]:
from scipy.stats import zscore

features = df_test[['Peak','SPC-24','Charge']]

# Z-score absoluto
Z = np.abs(zscore(features))

# M치scara: fila es outlier si alguna columna tiene Z > 3
outlier_mask = (Z >1).any(axis=1)

# DataFrame limpio
df_clean = df_test[~outlier_mask].reset_index(drop=True)
features = df_clean[['Peak','SPC-24','Charge']]

# Porcentaje de filas eliminadas
porcentaje_perdida = 100 * outlier_mask.sum() / len(df_test)
print(f"Porcentaje de p칠rdida por outliers: {porcentaje_perdida:.2f}%")

In [None]:
print('Promedio deltaTime',df_clean['deltaTime'].mean())
print('Promedio Pos Pico:',df_clean['Peak_Position'].mean())
print('Promedio Pico:',df_clean['Peak'].mean())
print('Promedio Carga:',df_clean['Charge'].mean())

PCA 游눡

In [None]:
# Escalado y PCA
X_scaled = StandardScaler().fit_transform(features)
pca = PCA(n_components=3)
X_pca = pca.fit_transform(X_scaled)


# Gr치ficos
fig, axes = plt.subplots(1, 3, figsize=(18, 6))

# PCA1 vs PCA2
axes[0].scatter(X_pca[:, 0], X_pca[:, 1], s=20, alpha=0.6)
axes[0].set_xlabel("PCA1")
axes[0].set_ylabel("PCA2")
axes[0].set_title("Proyecci칩n PCA1 vs PCA2")

# PCA1 vs PCA3
axes[1].scatter(X_pca[:, 0], X_pca[:, 2],s=20, alpha=0.6)
axes[1].set_xlabel("PCA1")
axes[1].set_ylabel("PCA3")
axes[1].set_title("Proyecci칩n PCA1 vs PCA3")

# PCA2 vs PCA3
axes[2].scatter(X_pca[:, 1], X_pca[:, 2],s=20, alpha=0.6)
axes[2].set_xlabel("PCA2")
axes[2].set_ylabel("PCA3")
axes[2].set_title("Proyecci칩n PCA2 vs PCA3")

# Leyenda y dise침o
fig.legend(handles=handles, title="Tipo de part칤cula", loc="upper right")
plt.tight_layout()
plt.show()

In [None]:
from sklearn.cluster import OPTICS
import matplotlib.pyplot as plt

optics = OPTICS(min_samples=100, xi=0.05, max_eps=0.5, min_cluster_size=100)
labels = optics.fit_predict(X_scaled)

# --- Configuraci칩n de color ---
unique_labels = np.unique(labels)
cmap = plt.cm.tab10
norm = plt.Normalize(vmin=min(unique_labels), vmax=max(unique_labels))

# --- Figura con tres proyecciones ---
fig, axes = plt.subplots(1, 3, figsize=(18, 6))

# PCA1 vs PCA2
axes[0].scatter(X_pca[:, 0], X_pca[:, 1], c=labels, cmap=cmap, s=10, alpha=0.6)
axes[0].set_xlabel("PCA1")
axes[0].set_ylabel("PCA2")
axes[0].set_title("OPTICS: PCA1 vs PCA2")

# PCA1 vs PCA3
axes[1].scatter(X_pca[:, 0], X_pca[:, 2], c=labels, cmap=cmap, s=10, alpha=0.6)
axes[1].set_xlabel("PCA1")
axes[1].set_ylabel("PCA3")
axes[1].set_title("OPTICS: PCA1 vs PCA3")

# PCA2 vs PCA3
axes[2].scatter(X_pca[:, 1], X_pca[:, 2], c=labels, cmap=cmap, s=10, alpha=0.6)
axes[2].set_xlabel("PCA2")
axes[2].set_ylabel("PCA3")
axes[2].set_title("OPTICS: PCA2 vs PCA3")

# --- Leyenda autom치tica ---
handles, legend_labels = axes[0].get_legend_handles_labels()
legend1 = fig.legend(*axes[0].collections[0].legend_elements(),
                     title="Clusters", loc="upper right")

plt.tight_layout()
plt.show()

In [None]:
def Charge_Histogram_All(df, cluster_col='cluster', bin_width=7):
    """
    Generates an histogram from the column 'Charge':
    - Global all data
    - Splits the data according to each cluster.

    Parameters:
    - df: DataFrame with the column 'Charge' could be cluster_col (ej: 0,1,2)
    - cluster_col: name of the cluster
    - bin_width: the width of the bins
    """
    plt.figure(figsize=(15, 8))

    # --- bins compartidos para todos ---
    min_val, max_val = df['Charge'].min(), df['Charge'].max()
    bins = np.arange(min_val, max_val + bin_width, bin_width)

    # --- histograma global ---
    counts, bin_edges = np.histogram(df['Charge'], bins=bins)
    bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
    plt.semilogy(bin_centers, counts, color='black', lw=3,alpha=0.3, label="Total", zorder=5)

    # --- histogramas por cluster ---
    clusters = sorted(df[cluster_col].unique())
    for c in clusters:
        subset = df[df[cluster_col] == c]['Charge'].to_numpy()
        counts, bin_edges = np.histogram(subset, bins=bins)
        bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
        plt.semilogy(bin_centers, counts, label=f"Cluster {c}", alpha=0.8, zorder=4)

    plt.ylabel("# of Counts", fontsize=20)
    plt.xlabel("ADC Values", fontsize=20)
    plt.title("Integrated Charge Histogram (Global + Clusters)", fontsize=20)
    plt.grid(True)
    plt.legend()
    plt.show()

In [None]:
df_clean['Clusters'] = labels

In [None]:
Charge_Histogram_All(df_clean, cluster_col='Clusters', bin_width=10)