# Main_Notebook

In [13]:
# =============================
# Instala pacotes necessários
# =============================
!pip install lightkurve tqdm --quiet

import lightkurve as lk
import pandas as pd
import os
from tqdm import tqdm

# =============================
# Lista de TICs confiáveis
# =============================
tics = [
    {"tic": "261136679", "name": "TOI-1749"},
    {"tic": "16740101",  "name": "TOI-1233"},
    {"tic": "183985250","name": "TOI-700d"}
]

# Pasta para salvar os CSVs
output_dir = "csv_lc"
os.makedirs(output_dir, exist_ok=True)

# =============================
# Loop principal para cada TIC
# =============================
for item in tics:
    tic_id = item["tic"]
    name = item["name"]
    print(f"\n==============================")
    print(f"🔭 Processando {name} ({tic_id})...")

    try:
        # Busca SPOC LightCurve
        print("📥 Buscando LightCurves SPOC...")
        lc_collection = lk.search_lightcurve(f"TIC {tic_id}", mission='TESS', author='SPOC').download_all()
        print(f"✅ {len(lc_collection)} LightCurves baixadas.")

        # Lista para concatenar os dados
        all_data = []

        # Processamento de cada LightCurve individual
        for i, lc in enumerate(lc_collection):
            print(f"  ➡ Processando LightCurve {i+1}/{len(lc_collection)}...")
            lc_clean = lc.remove_nans().remove_outliers(sigma=5).flatten(window_length=401).normalize()
            df_lc = lc_clean.to_pandas()
            all_data.append(df_lc)
            print(f"     ✔ LightCurve {i+1} processada e adicionada.")

        # Concatena todas LightCurves em um único DataFrame (sem stitch)
        df_final = pd.concat(all_data, ignore_index=True)

        # Salvar CSV
        csv_filename = os.path.join(output_dir, f"{tic_id}_refinado.csv")
        df_final.to_csv(csv_filename, index=False)
        print(f"💾 CSV salvo com sucesso: '{csv_filename}'")

    except Exception as e:
        print(f"❌ Erro ao processar {tic_id}: {e}")

print("\n🎉 Todos os TICs processados! CSVs prontos para IA.")


KeyboardInterrupt: 

In [None]:
from google.colab import files

files.download(csv_filename)

In [None]:
# =============================
# Instala pacotes necessários
# =============================
!pip install lightkurve tqdm --quiet

import lightkurve as lk
import pandas as pd
import os
from tqdm import tqdm
from google.colab import files, drive


In [None]:
# =============================
# Lista de TICs confiáveis
# =============================
tics = [
    {"tic": "261136679", "name": "TOI-1749"},
    {"tic": "16740101",  "name": "TOI-1233"},
    {"tic": "183985250","name": "TOI-700d"}
]

# Pasta local para salvar os CSVs temporariamente
local_output_dir = "csv_lc"
os.makedirs(local_output_dir, exist_ok=True)

# Monta Google Drive
drive.mount('/content/drive')
drive_output_dir = "/content/drive/MyDrive/csv_lc"
os.makedirs(drive_output_dir, exist_ok=True)
print("✅ Configuração inicial concluída!")


In [None]:
# =============================
# Loop principal para cada TIC
# =============================
for item in tics:
    tic_id = item["tic"]
    name = item["name"]
    print(f"\n==============================")
    print(f"🔭 Iniciando processamento de {name} ({tic_id})...")

    try:
        # Busca SPOC LightCurve
        print("📥 Buscando LightCurves SPOC...")
        lc_collection = lk.search_lightcurve(f"TIC {tic_id}", mission='TESS', author='SPOC').download_all()
        print(f"✅ {len(lc_collection)} LightCurves baixadas.")

        # Lista para concatenar os dados
        all_data = []

        # Processamento de cada LightCurve individual
        for i, lc in enumerate(lc_collection):
            print(f"  ➡ Processando LightCurve {i+1}/{len(lc_collection)}...")
            lc_clean = lc.remove_nans().remove_outliers(sigma=5).flatten(window_length=401).normalize()
            df_lc = lc_clean.to_pandas()
            all_data.append(df_lc)
            print(f"     ✔ LightCurve {i+1} processada e adicionada.")

        # Concatena todas LightCurves em um único DataFrame (sem stitch)
        df_final = pd.concat(all_data, ignore_index=True)
        print("🔗 Todas as LightCurves concatenadas com sucesso!")

        # Salvar CSV temporário local
        local_csv_path = os.path.join(local_output_dir, f"{tic_id}_refinado.csv")
        df_final.to_csv(local_csv_path, index=False)
        print(f"💾 CSV temporário salvo localmente: '{local_csv_path}'")

        # Salvar CSV no Google Drive
        drive_csv_path = os.path.join(drive_output_dir, f"{tic_id}_refinado.csv")
        df_final.to_csv(drive_csv_path, index=False)
        print(f"💾 CSV salvo no Google Drive: '{drive_csv_path}'")

        # Plot da curva de luz final
        lc_clean.plot(title=f"{name} ({tic_id})")
        print(f"✅ Processamento de {name} ({tic_id}) concluído!")

    except Exception as e:
        print(f"❌ Erro ao processar {tic_id}: {e}")

print("\n🎉 Todos os TICs processados e CSVs salvos!")


# COISAS DE IA

In [None]:
!pip install tensorflow flask-ngrok plotly pandas tqdm lightkurve --quiet

import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objs as go


In [None]:
# Pasta onde estão os CSVs processados
csv_dir = "csv_lc"  # Se você salvou no Google Drive, use o path do Drive
all_files = [os.path.join(csv_dir, f) for f in os.listdir(csv_dir) if f.endswith("_refinado.csv")]

X_list = []
y_list = []

# Para exemplo, vamos gerar labels artificiais para treino (1 = trânsito, 0 = sem trânsito)
# No caso real, labels podem vir de catálogo de trânsitos confirmados
for file in tqdm(all_files, desc="Carregando CSVs"):
    df = pd.read_csv(file)
    flux = df['flux'].values
    flux = flux.reshape(-1,1)

    # Normaliza
    scaler = MinMaxScaler()
    flux_scaled = scaler.fit_transform(flux)

    # Dividindo em janelas de 50 pontos (sequência temporal)
    window_size = 50
    for i in range(len(flux_scaled)-window_size):
        X_list.append(flux_scaled[i:i+window_size])
        # Label artificial: 1 se houver diminuição brusca de fluxo (simulando trânsito)
        y_list.append(int(np.min(flux_scaled[i:i+window_size]) < 0.98))

X = np.array(X_list)
y = np.array(y_list)

print(f"✅ Dataset preparado: {X.shape[0]} janelas, cada uma com {X.shape[1]} timesteps.")


In [None]:
# Modelo LSTM
model = Sequential()
model.add(LSTM(64, input_shape=(X.shape[1], 1), return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# Treinar
history = model.fit(X, y, epochs=5, batch_size=64, validation_split=0.2)


In [None]:
model.save("lstm_exoplanet_model.h5")
print("✅ Modelo LSTM salvo para backend!")


In [None]:
!ls /content | grep model_exoplanet_lstm.h5

In [None]:
# =====================================================
# 🤖 Treinamento e salvamento da IA LSTM de Exoplanetas
# =====================================================
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Gera dados simulados (use os reais se já tiver CSVs)
print("📡 Gerando dados de treino fake (exemplo)...")
timesteps = 200
num_samples = 500

# Curvas com "planetas" e sem
data = []
labels = []
for _ in range(num_samples):
    curve = np.random.normal(1.0, 0.01, timesteps)
    if np.random.rand() > 0.5:
        dip_pos = np.random.randint(20, timesteps - 20)
        curve[dip_pos:dip_pos+5] -= np.random.uniform(0.01, 0.05)
        labels.append(1)
    else:
        labels.append(0)
    data.append(curve)

X = np.array(data).reshape(num_samples, timesteps, 1)
y = np.array(labels)

print(f"✅ Dados gerados: {X.shape}, Labels: {y.shape}")

# Modelo LSTM simples
model = Sequential([
    LSTM(64, input_shape=(timesteps, 1), return_sequences=False),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(
    loss='binary_crossentropy',
    optimizer=Adam(learning_rate=0.001),
    metrics=['accuracy']
)

# Treinamento
print("🚀 Treinando o modelo (pode levar ~1 minuto)...")
history = model.fit(X, y, epochs=8, batch_size=32, verbose=1)

# Salva o modelo
model.save("/content/model_exoplanet_lstm.h5")
print("💾 Modelo salvo em: /content/model_exoplanet_lstm.h5")


In [14]:
# Cole esta célula inteira no Google Colab e execute.
# Compatível com Gradio 5.47.2
!pip install -q gradio==5.47.2 torch torchvision scikit-learn matplotlib numpy pandas scipy

import os, io, math, time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, roc_curve, auc
import torch, torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import gradio as gr

# ---------------------------
# Utility: realistic transit simulation (trapezoid + limb darkening + multiples)
# ---------------------------
def trapezoid_profile(t, t0, duration, depth, ingress_frac=0.18):
    D = duration
    tau = ingress_frac * D
    half = D / 2.0
    profile = np.ones_like(t)
    dt = t - t0
    inside = np.abs(dt) <= half
    if not np.any(inside):
        return profile
    for i, ti in enumerate(t):
        x = ti - t0
        if abs(x) <= (half - tau):
            profile[i] = 1.0 - depth
        elif abs(x) <= half:
            edge_pos = half - tau
            frac = (abs(x) - edge_pos) / (tau if tau>0 else 1.0)
            frac = min(max(frac, 0.0), 1.0)
            profile[i] = 1.0 - depth * (1.0 - frac)
        else:
            profile[i] = 1.0
    return profile

def apply_quadratic_limb_darkening(profile, t, t0, duration, u1=0.25, u2=0.05):
    D = duration
    half = D/2.0
    dt = (t - t0) / (half if half>0 else 1.0)
    mu = 1.0 - np.minimum(1.0, np.abs(dt))**2
    limb_factor = 1.0 - u1*(1-mu) - u2*(1-mu)**2
    limb_factor = np.clip(limb_factor, 0.3, 2.0)
    new_profile = np.where(profile < 1.0, 1.0 - (1.0-profile) * limb_factor, profile)
    return new_profile

def simulate_light_curve(period=3.0, depth=0.01, noise=0.001, cadence_min=30.0, duration_days=None,
                         ingress_frac=0.18, u1=0.25, u2=0.05, multiple_transits=True, seed=None, length=None):
    if seed is not None:
        np.random.seed(int(seed))
    if length is not None:
        x = np.linspace(0, 1, int(length))
        flux = np.ones_like(x)
        if depth > 0:
            start = np.random.randint(0, max(1, len(x)//2))
            dur = max(1, int(0.05 * len(x)))
            flux[start:start+dur] -= depth
        flux += np.random.normal(0, noise, size=flux.shape)
        return x, flux / np.median(flux)
    cadence_days = cadence_min / (24*60.0)
    if duration_days is None:
        duration_days = max(period * (3 if multiple_transits else 1), 1.0)
    t = np.arange(0, duration_days, cadence_days)
    flux = np.ones_like(t)
    t0 = duration_days / 2.0
    duration_est = max(0.02 * period, 0.01)
    centers = []
    if multiple_transits:
        first_center = t0 - period * int((t0 - t[0]) // period)
        c = first_center
        while c < t[-1] + period:
            centers.append(c)
            c += period
    else:
        centers = [t0]
    for c in centers:
        profile = trapezoid_profile(t, c, duration_est, depth, ingress_frac)
        profile = apply_quadratic_limb_darkening(profile, t, c, duration_est, u1, u2)
        flux *= profile
    flux += np.random.normal(0, noise, size=flux.shape)
    flux = flux / np.median(flux)
    return t, flux

# ---------------------------
# Dataset generator (unified for MLP and CNN)
# ---------------------------
def generate_dataset_unified(n_samples=800, length=None, period_range=(1.0,10.0), depth_range=(0.002,0.02),
                     noise_range=(0.0005,0.002), cadence_min=30.0, duration_days=5.0,
                     advanced=False, seed=None):
    if seed is not None:
        np.random.seed(int(seed))
    X = []
    y = []
    t_template = None
    for i in range(int(n_samples)):
        is_transit = np.random.rand() < 0.5
        period = float(np.random.uniform(*period_range))
        depth = float(np.random.uniform(*depth_range)) if is_transit else 0.0
        noise = float(np.random.uniform(*noise_range))
        if length is not None:
            t, flux = simulate_light_curve(period=period, depth=depth, noise=noise,
                                           cadence_min=cadence_min, duration_days=duration_days,
                                           ingress_frac=0.18, u1=0.25, u2=0.05, multiple_transits=False,
                                           seed=None, length=length)
        else:
            if advanced and is_transit:
                u1 = np.random.uniform(0.0, 0.5)
                u2 = np.random.uniform(0.0, 0.3)
                ingress = np.random.uniform(0.08, 0.25)
                t, flux = simulate_light_curve(period=period, depth=depth, noise=noise,
                                               cadence_min=cadence_min, duration_days=duration_days,
                                               ingress_frac=ingress, u1=u1, u2=u2, multiple_transits=True,
                                               seed=None)
            else:
                t, flux = simulate_light_curve(period=period, depth=depth, noise=noise,
                                               cadence_min=cadence_min, duration_days=duration_days,
                                               ingress_frac=0.18, u1=0.25, u2=0.05, multiple_transits=False,
                                               seed=None)
        flux = flux / np.median(flux)
        X.append(flux.astype(np.float32))
        y.append(1 if is_transit else 0)
        t_template = t
    X = np.stack(X)
    y = np.array(y).astype(np.int64)
    return X, y, t_template

# ---------------------------
# Model definitions
# ---------------------------
class SimpleMLP(nn.Module):
    def __init__(self, input_len):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_len, 128),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.net(x)

class CNN1D(nn.Module):
    def __init__(self, in_channels=1, num_classes=1, num_filters=32, kernel_size=7, dropout=0.3):
        super(CNN1D, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, num_filters, kernel_size, padding=kernel_size//2)
        self.bn1 = nn.BatchNorm1d(num_filters)
        self.conv2 = nn.Conv1d(num_filters, num_filters*2, kernel_size, padding=kernel_size//2)
        self.bn2 = nn.BatchNorm1d(num_filters*2)
        self.pool = nn.AdaptiveAvgPool1d(1)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(num_filters*2, num_classes)
        self.sigmoid = nn.Sigmoid()
    def forward(self, x):
        x = torch.relu(self.bn1(self.conv1(x)))
        x = torch.relu(self.bn2(self.conv2(x)))
        x = self.pool(x).squeeze(-1)
        x = self.dropout(x)
        return self.sigmoid(self.fc(x)).squeeze()

# ---------------------------
# Plot helpers
# ---------------------------
def plot_history(history):
    fig, ax = plt.subplots(1,2, figsize=(10,3))
    ax[0].plot(history.get("loss", []), label="train loss")
    ax[0].plot(history.get("val_loss", []), label="val loss")
    ax[0].set_title("Loss")
    ax[0].legend()
    ax[1].plot(history.get("acc", []), label="acc")
    ax[1].plot(history.get("val_acc", []), label="val_acc")
    ax[1].set_title("Accuracy")
    ax[1].legend()
    plt.tight_layout()
    return fig

def plot_confusion_matrix(cm):
    fig, ax = plt.subplots(figsize=(4,3))
    ax.imshow(cm, cmap="Blues", interpolation="nearest")
    ax.set_xticks([0,1]); ax.set_yticks([0,1])
    ax.set_xlabel("Predicted"); ax.set_ylabel("True")
    for (i,j), val in np.ndenumerate(cm):
        ax.text(j, i, int(val), ha="center", va="center", color="black")
    plt.tight_layout()
    return fig

def plot_roc(fpr, tpr, roc_auc):
    fig, ax = plt.subplots(figsize=(4,3))
    if len(fpr):
        ax.plot(fpr, tpr, label=f"AUC = {roc_auc:.3f}")
    ax.plot([0,1],[0,1],"--", color="gray")
    ax.set_xlabel("FPR"); ax.set_ylabel("TPR")
    ax.legend()
    ax.set_title("ROC")
    plt.tight_layout()
    return fig

def plot_light_curve(t, flux, overlay=None):
    fig, ax = plt.subplots(figsize=(7,2.6))
    ax.plot(t, flux, lw=0.8)
    if overlay is not None:
        ax.plot(t, overlay, lw=1.0, linestyle="--")
    ax.set_xlabel("Days"); ax.set_ylabel("Relative flux")
    plt.tight_layout()
    return fig

def plot_residuals(t, flux, model):
    residuals = flux - model
    fig, ax = plt.subplots(figsize=(7,2.0))
    ax.plot(t, residuals, lw=0.7)
    ax.set_xlabel("Days"); ax.set_ylabel("Residual flux")
    plt.tight_layout()
    return fig

# ---------------------------
# Evaluation utilities
# ---------------------------
def evaluate_model_numpy_preds(preds, y_true, threshold=0.5):
    pred_labels = (preds >= threshold).astype(int)
    cm = confusion_matrix(y_true, pred_labels)
    precision = precision_score(y_true, pred_labels, zero_division=0)
    recall = recall_score(y_true, pred_labels, zero_division=0)
    f1 = f1_score(y_true, pred_labels, zero_division=0)
    try:
        fpr, tpr, _ = roc_curve(y_true, preds)
        roc_auc = auc(fpr, tpr)
    except Exception:
        fpr, tpr, roc_auc = np.array([]), np.array([]), 0.0
    return {"preds": preds, "labels": pred_labels, "cm": cm, "precision": precision, "recall": recall, "f1": f1, "fpr": fpr, "tpr": tpr, "roc_auc": roc_auc}

# ---------------------------
# Streaming training for MLP (keeps original behaviour)
# ---------------------------
def train_streaming_mlp(X_train, y_train, X_val, y_val, epochs=8, batch_size=32, lr=1e-3, augment=False, advanced_outputs=False):
    device = ("cuda" if torch.cuda.is_available() else "cpu")
    input_len = X_train.shape[1]
    model = SimpleMLP(input_len).to(device)
    opt = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.BCELoss()
    train_ds = TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train.reshape(-1,1)).float())
    loader = DataLoader(train_ds, batch_size=int(batch_size), shuffle=True)
    history = {"loss":[], "val_loss":[], "acc":[], "val_acc":[]}
    logs = []
    for epoch in range(1, int(epochs)+1):
        model.train()
        running_loss = 0.0
        for bx, by in loader:
            bx = bx.to(device)
            by = by.to(device)
            if augment:
                bx = bx + 0.001 * torch.randn_like(bx)
            preds = model(bx)
            loss = loss_fn(preds, by)
            opt.zero_grad()
            loss.backward()
            opt.step()
            running_loss += loss.item() * bx.size(0)
        train_loss = running_loss / len(loader.dataset)
        # validation
        model.eval()
        with torch.no_grad():
            vt = torch.from_numpy(X_val).to(device)
            vy = torch.from_numpy(y_val.reshape(-1,1)).to(device)
            vpreds = model(vt).cpu().numpy().ravel()
            val_loss = float(np.mean((- (y_val * np.log(np.clip(vpreds,1e-8,1-1e-8)) + (1-y_val) * np.log(np.clip(1-vpreds,1e-8,1-1e-8)))))) if len(y_val)>0 else 0.0
            pred_labels = (vpreds >= 0.5).astype(int)
            acc = (pred_labels == y_val).mean() if len(y_val)>0 else 0.0
        history["loss"].append(train_loss); history["val_loss"].append(val_loss); history["acc"].append(acc); history["val_acc"].append(acc)
        logs.append(f"Epoch {epoch}/{epochs} — loss: {train_loss:.6f} — val_acc: {acc:.4f}")
        try:
            fig_hist = plot_history(history)
        except Exception:
            fig_hist = None
        logs_text = "\n".join(logs[-20:])
        metrics_text = f"Last epoch acc: {acc:.4f}  train_loss: {train_loss:.6f}  val_loss: {val_loss:.6f}"
        adv1 = None
        if advanced_outputs:
            try:
                cm = confusion_matrix(y_val, pred_labels)
                adv1 = plot_confusion_matrix(cm)
            except Exception:
                adv1 = None
        yield None, fig_hist, logs_text, metrics_text, adv1, None
    # after training, save model
    model_path = "/content/galileo_exoplanet_classifier_mlp.pt"
    torch.save(model.state_dict(), model_path)
    # final evaluation
    eval_res = evaluate_model_numpy_preds(vpreds, y_val)
    cm_fig = plot_confusion_matrix(eval_res["cm"])
    roc_fig = plot_roc(eval_res["fpr"], eval_res["tpr"], eval_res["roc_auc"])
    logs.append("Training complete. Model saved to: " + model_path)
    logs_text = "\n".join(logs[-50:])
    metrics_text = f"Precision: {eval_res['precision']:.3f}  Recall: {eval_res['recall']:.3f}  F1: {eval_res['f1']:.3f}  AUC: {eval_res['roc_auc']:.3f}"
    yield model_path, roc_fig, logs_text, metrics_text, cm_fig, roc_fig

# ---------------------------
# Streaming training for CNN (based on second file training)
# ---------------------------
def train_streaming_cnn(n_samples, length, epochs, batch_size, lr, num_filters, kernel_size, dropout, noise, depth, device=None):
    device = device or ("cuda" if torch.cuda.is_available() else "cpu")
    X_np, y_np, _ = generate_dataset_unified(n_samples=n_samples, length=length, noise_range=(noise, noise), depth_range=(depth, depth))
    X_np = (X_np - X_np.mean(axis=1, keepdims=True)) / (X_np.std(axis=1, keepdims=True) + 1e-6)
    split = int(0.8 * len(X_np))
    X_train_np, X_val_np = X_np[:split], X_np[split:]
    y_train_np, y_val_np = y_np[:split], y_np[split:]
    X_train = torch.tensor(X_train_np, dtype=torch.float32).unsqueeze(1)
    y_train = torch.tensor(y_train_np, dtype=torch.float32)
    X_val = torch.tensor(X_val_np, dtype=torch.float32).unsqueeze(1)
    y_val = torch.tensor(y_val_np, dtype=torch.float32)
    train_ds = TensorDataset(X_train, y_train)
    loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    model = CNN1D(in_channels=1, num_filters=int(num_filters), kernel_size=int(kernel_size), dropout=float(dropout)).to(device)
    criterion = nn.BCELoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=int(epochs))
    history = {"loss":[], "val_loss":[], "acc":[], "val_acc":[]}
    logs = []
    for epoch in range(int(epochs)):
        model.train()
        running_loss = 0.0
        for xb, yb in loader:
            xb = xb.to(device)
            yb = yb.to(device)
            optimizer.zero_grad()
            outputs = model(xb)
            loss = criterion(outputs, yb)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * xb.size(0)
        scheduler.step()
        train_loss = running_loss / len(loader.dataset)
        # eval on val
        model.eval()
        with torch.no_grad():
            preds_val = model(X_val.to(device)).cpu().numpy().ravel()
            pred_labels = (preds_val >= 0.5).astype(int)
            acc = (pred_labels == y_val_np).mean() if len(y_val_np)>0 else 0.0
            try:
                val_loss = float(np.mean((- (y_val_np * np.log(np.clip(preds_val,1e-8,1-1e-8)) + (1-y_val_np) * np.log(np.clip(1-preds_val,1e-8,1-1e-8))))))
            except Exception:
                val_loss = 0.0
        history["loss"].append(train_loss); history["val_loss"].append(val_loss); history["acc"].append(acc); history["val_acc"].append(acc)
        logs.append(f"Epoch {epoch+1}/{epochs} - Loss: {train_loss:.6f} - val_acc: {acc:.4f}")
        try:
            fig_hist = plot_history(history)
        except Exception:
            fig_hist = None
        logs_text = "\n".join(logs[-20:])
        metrics_text = f"Last epoch acc: {acc:.4f}  train_loss: {train_loss:.6f}  val_loss: {val_loss:.6f}"
        try:
            cm = confusion_matrix(y_val_np, pred_labels)
            adv1 = plot_confusion_matrix(cm)
        except Exception:
            adv1 = None
        yield None, fig_hist, logs_text, metrics_text, adv1, None
    model_path = "/content/galileo_exoplanet_classifier_cnn.pt"
    torch.save(model.state_dict(), model_path)
    eval_res = evaluate_model_numpy_preds(preds_val, y_val_np)
    cm_fig = plot_confusion_matrix(eval_res["cm"])
    roc_fig = plot_roc(eval_res["fpr"], eval_res["tpr"], eval_res["roc_auc"])
    logs.append("Training complete. Model saved to: " + model_path)
    logs_text = "\n".join(logs[-50:])
    metrics_text = f"Precision: {eval_res['precision']:.3f}  Recall: {eval_res['recall']:.3f}  F1: {eval_res['f1']:.3f}  AUC: {eval_res['roc_auc']:.3f}"
    yield model_path, roc_fig, logs_text, metrics_text, cm_fig, roc_fig

# ---------------------------
# Globals
# ---------------------------
GLOBAL = {"t_template": None, "last_curve": None, "last_Xy": None}

# ---------------------------
# GRadio UI (main) - layout from first code, with model selector
# ---------------------------
css = """
<style>
body { background: #08102a !important; color: #dfefff !important; }
.gradio-container { background: linear-gradient(180deg, #07102a 0%, #0b1437 100%) !important; color: #e6f0ff !important; }
.gradio-input, .gradio-output, .gradio-block { background: rgba(255,255,255,0.02) !important; border-radius: 8px; }
h1, h2, h3 { color: #e6f7ff !important; }
</style>
"""

intro_md = """
# Galileo AI
Escolha entre MLP (dense) e CNN1D. Ambos lêem curvas simuladas/fornecidas, treinam com streaming, e geram gráficos e matrizes.
"""

with gr.Blocks(css=css, title="Galileo AI ") as demo:
    gr.Markdown(intro_md)

    with gr.Tab("Create / Train"):
        gr.Markdown("Configure model and start training (streaming logs).")
        model_type = gr.Radio(choices=["MLP", "CNN1D"], value="CNN1D", label="Tipo de modelo")
        # Common controls
        epochs = gr.Number(value=8, label="Epochs")
        batch_size = gr.Number(value=32, label="Batch size")
        lr = gr.Number(value=1e-3, label="Learning rate")
        augment = gr.Checkbox(value=True, label="Data augmentation (jitter) — MLP only")
        adv_train = gr.Checkbox(value=False, label="Advanced outputs (show more diagnostics)")
        # CNN-specific options
        cnn_filters = gr.Number(value=32, label="CNN filters (num_filters)")
        cnn_kernel = gr.Number(value=7, label="CNN kernel_size")
        cnn_dropout = gr.Number(value=0.3, label="CNN dropout")
        cnn_length = gr.Number(value=500, label="Fixed input length (for CNN)", precision=0)
        # dataset controls
        samples = gr.Number(value=800, label="Training samples (n_samples)")
        noise_level = gr.Number(value=0.002, label="Noise (for data gen)")
        transit_depth = gr.Number(value=0.01, label="Transit depth (for data gen)")
        train_btn = gr.Button("Start training (streaming)")
        train_model_file = gr.File(label="Saved model (.pt)")
        train_history_plot = gr.Plot()
        train_logs = gr.Textbox(label="Training logs (streamed)", lines=12)
        train_metrics = gr.Textbox(label="Metrics", lines=3)
        train_cm = gr.Plot()
        train_roc = gr.Plot()

        def start_training(model_type, epochs, batch_size, lr, augment, adv_train,
                           cnn_filters, cnn_kernel, cnn_dropout, cnn_length,
                           samples, noise_level, transit_depth):
            try:
                if model_type == "MLP":
                    X, y, t = generate_dataset_unified(n_samples=int(samples), length=None, noise_range=(noise_level, noise_level),
                                                       depth_range=(transit_depth, transit_depth), advanced=adv_train, seed=1)
                    GLOBAL["t_template"] = t
                    split = int(0.8 * len(X))
                    X_train, X_val = X[:split], X[split:]
                    y_train, y_val = y[:split].astype(np.float32), y[split:].astype(np.float32)
                    for out in train_streaming_mlp(X_train, y_train, X_val, y_val,
                                                   epochs=int(epochs), batch_size=int(batch_size), lr=float(lr), augment=bool(augment), advanced_outputs=bool(adv_train)):
                        yield out
                else:
                    for out in train_streaming_cnn(n_samples=int(samples), length=int(cnn_length), epochs=int(epochs),
                                                   batch_size=int(batch_size), lr=float(lr), num_filters=int(cnn_filters),
                                                   kernel_size=int(cnn_kernel), dropout=float(cnn_dropout),
                                                   noise=float(noise_level), depth=float(transit_depth)):
                        yield out
            except Exception as e:
                yield (None, None, f"Error during training: {repr(e)}", "", None, None)

        train_btn.click(start_training,
                        inputs=[model_type, epochs, batch_size, lr, augment, adv_train, cnn_filters, cnn_kernel, cnn_dropout, cnn_length, samples, noise_level, transit_depth],
                        outputs=[train_model_file, train_history_plot, train_logs, train_metrics, train_cm, train_roc])

    with gr.Tab("Generate / Download Samples"):
        gr.Markdown("Generate or download synthetic samples for training.")
        samples_n = gr.Number(value=800, label="Number of samples")
        samples_len = gr.Number(value=500, label="Fixed length (for CNN) — leave blank for variable-length", precision=0)
        samples_adv = gr.Checkbox(value=False, label="Advanced generation (limb darkening, multiple transits)")
        gen_samples_btn = gr.Button("Generate & Save .npz")
        samples_file = gr.File(label="Dataset (.npz)")
        samples_status = gr.Textbox(label="Status", lines=2)

        def generate_and_save(n_samples, length, adv):
            try:
                length_val = int(length) if (length is not None and length != "") else None
                X, y, t = generate_dataset_unified(n_samples=int(n_samples), length=length_val, advanced=adv, seed=2)
                GLOBAL["last_Xy"] = (X,y); GLOBAL["t_template"] = t
                buf = io.BytesIO(); np.savez_compressed(buf, X=X, y=y, t=t); buf.seek(0)
                path = "/content/galileo_samples.npz"
                with open(path, "wb") as f: f.write(buf.read())
                return path, f"Saved {n_samples} samples to {path}"
            except Exception as e:
                return None, f"Error generating samples: {repr(e)}"
        gen_samples_btn.click(generate_and_save, inputs=[samples_n, samples_len, samples_adv], outputs=[samples_file, samples_status])

    with gr.Tab("Generate Light Curve"):
        gr.Markdown("Generate a configurable light curve. Use Advanced Outputs for periodogram/residuals.")
        per = gr.Number(value=3.0, label="Period (days)")
        depth = gr.Number(value=0.01, label="Depth (fraction)")
        noise = gr.Number(value=0.001, label="Noise (sigma)")
        cadence = gr.Number(value=30.0, label="Cadence (minutes)")
        duration = gr.Number(value=5.0, label="Duration (days)")
        ingress_frac = gr.Number(value=0.18, label="Ingress fraction")
        u1 = gr.Number(value=0.25, label="Limb darkening u1")
        u2 = gr.Number(value=0.05, label="Limb darkening u2")
        multiple = gr.Checkbox(value=True, label="Multiple transits")
        seed_curve = gr.Number(value=7, label="Random seed")
        adv_curve = gr.Checkbox(value=False, label="Advanced outputs (periodogram, residuals)")
        gen_curve_btn = gr.Button("Generate curve")
        curve_plot = gr.Plot()
        curve_download = gr.File(label="Download curve (.csv)")
        curve_adv1 = gr.Plot()
        curve_adv2 = gr.Plot()

        def handle_generate_curve(per, depth, noise, cadence, duration, ingress_frac, u1, u2, multiple, seed, adv_curve):
            try:
                t, flux = simulate_light_curve(period=float(per), depth=float(depth), noise=float(noise),
                                               cadence_min=float(cadence), duration_days=float(duration),
                                               ingress_frac=float(ingress_frac), u1=float(u1), u2=float(u2),
                                               multiple_transits=bool(multiple), seed=int(seed))
                GLOBAL["last_curve"] = (t, flux)
                # save csv
                df = pd.DataFrame({"time":t, "flux":flux})
                path = "/content/galileo_last_curve.csv"
                df.to_csv(path, index=False)
                fig = plot_light_curve(t, flux)
                adv_fig1, adv_fig2 = None, None
                if adv_curve:
                    adv_fig1 = plot_residuals(t, flux, flux)  # trivial residuals example
                    try:
                        grid = np.linspace(t.min(), t.max(), len(t))
                        interp = np.interp(grid, t, flux - np.mean(flux))
                        fft = np.fft.rfft(interp)
                        freqs = np.fft.rfftfreq(len(grid), d=(grid[1]-grid[0]))
                        power = np.abs(fft)**2
                        periods = 1.0 / np.maximum(freqs, 1e-8)
                        fig2, ax2 = plt.subplots(figsize=(6,2.2))
                        ax2.plot(periods[1:], power[1:])
                        ax2.set_xscale("log")
                        ax2.set_xlabel("Period (days)"); ax2.set_ylabel("Power")
                        ax2.set_title("Approx. periodogram")
                        plt.tight_layout()
                        adv_fig2 = fig2
                    except Exception:
                        adv_fig2 = None
                return fig, path, adv_fig1, adv_fig2
            except Exception as e:
                err_fig = None
                return err_fig, None, None, None

        gen_curve_btn.click(handle_generate_curve, inputs=[per, depth, noise, cadence, duration, ingress_frac, u1, u2, multiple, seed_curve, adv_curve],
                            outputs=[curve_plot, curve_download, curve_adv1, curve_adv2])

    with gr.Tab("Infer & Convert"):
        gr.Markdown("Run inference and convert depth -> Rp/R★ and estimate radius (km) if star radius provided.")
        upload_model = gr.File(label="Upload model (.pt) (optional)")
        upload_curve = gr.File(label="Upload curve (.csv) (optional)")
        threshold = gr.Number(value=0.5, label="Threshold (0-1)")
        consider_fp = gr.Checkbox(value=True, label="Consider false positive heuristics")
        star_radius = gr.Number(value=1.0, label="Host star radius (R_sun) (optional)")
        adv_infer = gr.Checkbox(value=False, label="Advanced outputs (periodogram/residuals)")
        infer_btn = gr.Button("Run inference")
        infer_json = gr.JSON(label="Inference summary")
        infer_plot = gr.Plot()
        infer_adv1 = gr.Plot()
        infer_adv2 = gr.Plot()
        infer_model_type = gr.Radio(choices=["MLP", "CNN1D"], value="CNN1D", label="Model architecture for inference (if uploading trained model, choose same type)")

        # ---------------------------
        # ⚙️ Função de inferência corrigida (garante float32)
        # ---------------------------
        def handle_infer(model_type_for_infer, model_file, curve_file, threshold, consider_fp, star_radius, adv_infer):
            try:
                # load curve
                if curve_file is not None:
                    df = pd.read_csv(curve_file.name)
                    t = df["time"].values; flux = df["flux"].values
                elif GLOBAL.get("last_curve") is not None:
                    t, flux = GLOBAL["last_curve"]
                else:
                    return {"error":"No curve provided. Generate or upload a curve."}, None, None, None

                # prepare model based on selection
                L = len(flux)
                model = None
                device = "cpu"
                if model_type_for_infer == "MLP":
                    model = SimpleMLP(L)
                else:
                    model = CNN1D(in_channels=1, num_filters=32, kernel_size=7, dropout=0.3)
                # try to load provided model file
                if model_file is not None:
                    try:
                        st = torch.load(model_file.name, map_location="cpu")
                        if isinstance(st, dict):
                            model.load_state_dict(st)
                        else:
                            model = st
                    except Exception:
                        pass
                else:
                    # try to load default saved files
                    if model_type_for_infer == "MLP" and os.path.exists("/content/galileo_exoplanet_classifier_mlp.pt"):
                        st = torch.load("/content/galileo_exoplanet_classifier_mlp.pt", map_location="cpu")
                        model.load_state_dict(st)
                    if model_type_for_infer == "CNN1D" and os.path.exists("/content/galileo_exoplanet_classifier_cnn.pt"):
                        st = torch.load("/content/galileo_exoplanet_classifier_cnn.pt", map_location="cpu")
                        model.load_state_dict(st)

                # ✅ CORREÇÃO: garantir mesmo tipo (float32) e dispositivo (CPU)
                model.eval()
                with torch.no_grad():
                    if model_type_for_infer == "MLP":
                        x = torch.tensor(flux, dtype=torch.float32).unsqueeze(0)
                        score = float(model(x).cpu().numpy().ravel()[0])
                    else:
                        x_np = (flux.astype(np.float32) - np.mean(flux)) / (np.std(flux) + 1e-6)
                        x = torch.tensor(x_np, dtype=torch.float32).unsqueeze(0).unsqueeze(0)
                        score = float(model(x).cpu().numpy().ravel()[0])

                is_exoplanet = bool(score >= float(threshold))
                depth_est = float(max(0.0, 1.0 - np.min(flux)))
                transit_duration = float(np.sum(flux < (1.0 - depth_est*0.5)) * (t[1]-t[0]) if len(t)>1 else 0.0)
                likely_fp = False; reasons = []
                if consider_fp:
                    if depth_est > 0.03:
                        likely_fp = True; reasons.append("Large depth (>3%) — possible eclipsing binary or false positive.")
                    if (transit_duration / max(1e-8, (t[-1]-t[0]))) > 0.5:
                        likely_fp = True; reasons.append("Transit occupies large fraction of observation — check variables/systematics.")
                rp_over_rs = float(math.sqrt(max(depth_est, 0.0)))
                planet_radius_km = None
                if star_radius is not None and star_radius > 0:
                    R_sun_km = 695700.0
                    planet_radius_km = float(rp_over_rs * star_radius * R_sun_km)
                summary = {"score": score, "is_exoplanet": is_exoplanet, "depth_est": depth_est, "rp_over_rs": rp_over_rs,
                           "planet_radius_km": planet_radius_km, "likely_false_positive": likely_fp, "reasons": reasons}
                fig = plot_light_curve(t, flux)
                adv1, adv2 = None, None
                if adv_infer:
                    adv1 = plot_residuals(t, flux, flux)  # placeholder residuals
                    try:
                        grid = np.linspace(t.min(), t.max(), len(t))
                        interp = np.interp(grid, t, flux - np.mean(flux))
                        fft = np.fft.rfft(interp)
                        freqs = np.fft.rfftfreq(len(grid), d=(grid[1]-grid[0]))
                        power = np.abs(fft)**2
                        periods = 1.0 / np.maximum(freqs, 1e-8)
                        fig2, ax2 = plt.subplots(figsize=(6,2.2))
                        ax2.plot(periods[1:], power[1:])
                        ax2.set_xscale("log")
                        ax2.set_xlabel("Period (days)"); ax2.set_ylabel("Power")
                        ax2.set_title("Approx. periodogram")
                        plt.tight_layout()
                        adv2 = fig2
                    except Exception:
                        adv2 = None
                return summary, fig, adv1, adv2
            except Exception as e:
                return {"error": f"Inference failed: {repr(e)}"}, None, None, None

        infer_btn.click(handle_infer, inputs=[infer_model_type, upload_model, upload_curve, threshold, consider_fp, star_radius, adv_infer],
                         outputs=[infer_json, infer_plot, infer_adv1, infer_adv2])

    gr.Markdown("Note: Educational prototype. Use real datasets and tune hyperparameters for production.")

demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://6fe917d6d3d62ac564.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
import gradio as gr
gr.__version__
