In [48]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pickle

In [49]:
def load_data(file_path):
    data = pd.read_csv(file_path)
    return data


In [50]:
def clean_data(data):
    cleaned_data = data.dropna()
    return cleaned_data

In [51]:
def analyze_data(data):
    sns.pairplot(data, vars=['Mean_Integrated', 'SD', 'EK', 'Skewness', 'Mean_DMSNR_Curve', 'SD_DMSNR_Curve', 'EK_DMSNR_Curve', 'Skewness_DMSNR_Curve'], hue='Class')
    plt.show()

In [52]:
def split_data(data):
    X = data.drop('Class', axis=1)
    y = data['Class']
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
    return X_train, X_val, X_test, y_train, y_val, y_test

In [53]:
def select_features(data):
    relevant_features = ['EK', 'Skewness']
    X_selected = data[relevant_features]
    return X_selected

In [54]:
def normalize_data(X_train, X_val, X_test):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_val_scaled, X_test_scaled

In [55]:
def train_model(X_train, y_train, learning_rate):
    model = keras.Sequential([
        layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        layers.Dense(32, activation='relu'),
        layers.Dense(1, activation='sigmoid')
    ])
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)
    return model

In [56]:
def train_final_model(X_train, y_train, X_val, y_val):
    learning_rates = [0.001, 0.01, 0.1]
    best_accuracy = 0
    best_model = None
    for lr in learning_rates:
        model = train_model(X_train, y_train, lr)
        _, accuracy = model.evaluate(X_val, y_val, verbose=0)
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model = model
    return best_model

In [57]:
def evaluate_model(model, X_test, y_test):
    loss, accuracy = model.evaluate(X_test, y_test)
    print("Accuracy:", accuracy)

In [58]:
def save_model(model, scaler, model_file, scaler_file):
    model.save(model_file)
    scaler_filename = scaler_file + '.pkl'
    with open(scaler_filename, 'wb') as f:
        pickle.dump(scaler, f)


In [59]:
file_path = '/content/drive/MyDrive/Archivos-Curso/Pulsar.csv'
data = load_data(file_path)


In [60]:
cleaned_data = clean_data(data)
cleaned_data.to_csv('/content/drive/MyDrive/Archivos-Curso/Pulsar_cleaned.csv', index=False)


In [61]:
X_train, X_val, X_test, y_train, y_val, y_test = split_data(cleaned_data)


In [62]:
X_train_selected = select_features(X_train)
X_val_selected = select_features(X_val)
X_test_selected = select_features(X_test)


In [63]:
# Normalización/Escalado
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_selected)
X_val_scaled = scaler.transform(X_val_selected)
X_test_scaled = scaler.transform(X_test_selected)


In [64]:
best_model = train_final_model(X_train_scaled, y_train, X_val_scaled, y_val)


In [65]:
evaluate_model(best_model, X_test_scaled, y_test)


Accuracy: 0.9787709712982178


In [66]:
model_file = 'best_model.h5'
scaler_file = 'scaler'
save_model(best_model, scaler, model_file, scaler_file)