# LARUN TinyML - Paperspace Gradient Training

**Train the LARUN exoplanet detection model using Paperspace FREE GPU**

Created by: Padmanaban Veeraragavalu (Larun Engineering)

---

## Paperspace Setup:
1. Go to https://console.paperspace.com/gradient
2. **Notebooks** → **Create** → **Free GPU (M4000)**
3. Select **TensorFlow** runtime
4. Upload this notebook
5. Run all cells

**Paperspace Benefits:**
- Free M4000 GPU (6hr sessions)
- Persistent /storage directory
- Good for experimentation

In [None]:
# Step 1: Check GPU
!nvidia-smi

import tensorflow as tf
print(f"\nTensorFlow: {tf.__version__}")
print(f"GPUs: {tf.config.list_physical_devices('GPU')}")

In [None]:
# Step 2: Install packages
!pip install -q lightkurve astroquery tqdm scikit-learn

import lightkurve as lk
print(f"Lightkurve installed: {lk.__version__}")

In [None]:
# Step 3: Configuration
import os

NUM_PLANETS = 100
NUM_NON_PLANETS = 100
EPOCHS = 100
BATCH_SIZE = 32
INPUT_SIZE = 1024
MAX_WORKERS = 8

# Paperspace persistent storage
OUTPUT_DIR = '/storage/larun_output'
os.makedirs(OUTPUT_DIR, exist_ok=True)

print(f"Output: {OUTPUT_DIR} (persistent across sessions)")

In [None]:
# Step 4: Fetch exoplanet hosts
import numpy as np
from astroquery.nasa_exoplanet_archive import NasaExoplanetArchive
import warnings
warnings.filterwarnings('ignore')

print("Querying NASA Exoplanet Archive...")

planets_table = NasaExoplanetArchive.query_criteria(
    table="pscomppars",
    select="hostname,disc_facility",
    where="disc_facility like '%TESS%' or disc_facility like '%Kepler%'"
)

planet_hosts = list(set(planets_table['hostname'].data.tolist()))
np.random.shuffle(planet_hosts)
planet_hosts = planet_hosts[:NUM_PLANETS]
print(f"Found {len(planet_hosts)} exoplanet hosts")

In [None]:
# Step 5: Parallel fetch function
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm.notebook import tqdm

def fetch_lightcurve(args):
    target, label = args
    try:
        search = lk.search_lightcurve(target, mission=['TESS', 'Kepler'])
        if len(search) == 0:
            return None
        
        lc = search[0].download(quality_bitmask='default')
        lc = lc.remove_nans().normalize().remove_outliers(sigma=3)
        flux = lc.flux.value
        
        if len(flux) < INPUT_SIZE:
            flux = np.pad(flux, (0, INPUT_SIZE - len(flux)), mode='median')
        else:
            start = (len(flux) - INPUT_SIZE) // 2
            flux = flux[start:start + INPUT_SIZE]
        
        return {'flux': flux.astype(np.float32), 'label': label, 'target': target}
    except:
        return None

# Fetch planets
planet_data = []
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
    futures = [executor.submit(fetch_lightcurve, (h, 1)) for h in planet_hosts]
    for f in tqdm(as_completed(futures), total=len(futures), desc="Planets"):
        r = f.result()
        if r: planet_data.append(r)

print(f"✓ Got {len(planet_data)} planet light curves")

In [None]:
# Step 6: Fetch non-planets
non_planet_tics = [f"TIC {100000000 + i*100}" for i in range(NUM_NON_PLANETS * 5)]
non_planet_data = []

with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
    futures = [executor.submit(fetch_lightcurve, (t, 0)) for t in non_planet_tics]
    for f in tqdm(as_completed(futures), total=len(futures), desc="Non-planets"):
        if len(non_planet_data) >= NUM_NON_PLANETS:
            break
        r = f.result()
        if r: non_planet_data.append(r)

non_planet_data = non_planet_data[:NUM_NON_PLANETS]
print(f"✓ Got {len(non_planet_data)} non-planet light curves")

In [None]:
# Step 7: Prepare data
from sklearn.model_selection import train_test_split

all_data = planet_data + non_planet_data
X = np.array([d['flux'] for d in all_data])
y = np.array([d['label'] for d in all_data])

X = (X - X.mean(axis=1, keepdims=True)) / (X.std(axis=1, keepdims=True) + 1e-8)
X = X.reshape(-1, INPUT_SIZE, 1).astype(np.float32)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Train: {len(X_train)}, Val: {len(X_val)}")

np.savez(f'{OUTPUT_DIR}/training_data.npz', X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val)

In [None]:
# Step 8: Build model
from tensorflow import keras
from tensorflow.keras import layers

model = keras.Sequential([
    keras.Input(shape=(INPUT_SIZE, 1)),
    layers.Conv1D(32, 7, padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling1D(4),
    layers.Dropout(0.25),
    layers.Conv1D(64, 5, padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling1D(4),
    layers.Dropout(0.25),
    layers.Conv1D(128, 3, padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.GlobalAveragePooling1D(),
    layers.Dropout(0.5),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(2, activation='softmax')
], name='larun_paperspace')

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
# Step 9: Train
callbacks = [
    keras.callbacks.EarlyStopping(patience=15, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=7),
    keras.callbacks.ModelCheckpoint(f'{OUTPUT_DIR}/best.h5', save_best_only=True)
]

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=callbacks,
    verbose=1
)

In [None]:
# Step 10: Evaluate and export
import matplotlib.pyplot as plt

val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
print(f"Accuracy: {val_acc*100:.2f}%")

# Plot
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train')
plt.plot(history.history['val_accuracy'], label='Val')
plt.title('Accuracy')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train')
plt.plot(history.history['val_loss'], label='Val')
plt.title('Loss')
plt.legend()
plt.savefig(f'{OUTPUT_DIR}/history.png')
plt.show()

In [None]:
# Step 11: Export TFLite
model.save(f'{OUTPUT_DIR}/larun_model.h5')

converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite = converter.convert()
with open(f'{OUTPUT_DIR}/larun_model.tflite', 'wb') as f:
    f.write(tflite)

converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quant = converter.convert()
with open(f'{OUTPUT_DIR}/larun_model_int8.tflite', 'wb') as f:
    f.write(tflite_quant)

print(f"TFLite: {len(tflite)/1024:.1f} KB")
print(f"INT8: {len(tflite_quant)/1024:.1f} KB")

In [None]:
# Step 12: Package for download
!cd {OUTPUT_DIR} && zip -r larun_trained.zip *.h5 *.tflite *.npz *.png

print(f"\n{'='*50}")
print("TRAINING COMPLETE!")
print(f"{'='*50}")
print(f"Accuracy: {val_acc*100:.2f}%")
print(f"\nFiles in: {OUTPUT_DIR}/")
print("Download from /storage/larun_output/ in file browser")