In [1]:
import sys
sys.path.insert(0, '..')

import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import DataLoader
from datetime import datetime
from tqdm import tqdm
import umap
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import seaborn as sns

from core.multimodal.dataset2 import VPSMDatasetV2
from core.multimodal.model import ModelV1
from util.parallelzipfile import ParallelZipFile as ZipFile
from core.data.utils import preprocess_spectra, readLRSFits, preprocess_lc, get_vlc

In [2]:
METADATA_COLS = [
    'mean_vmag', 'amplitude', 'period', 'phot_g_mean_mag', 'e_phot_g_mean_mag', 'lksl_statistic',
    'rfr_score', 'phot_bp_mean_mag', 'e_phot_bp_mean_mag', 'phot_rp_mean_mag', 'e_phot_rp_mean_mag',
    'bp_rp', 'parallax', 'parallax_error', 'parallax_over_error', 'pmra', 'pmra_error', 'pmdec',
    'pmdec_error', 'j_mag', 'e_j_mag', 'h_mag', 'e_h_mag', 'k_mag', 'e_k_mag', 'w1_mag', 'e_w1_mag',
    'w2_mag', 'e_w2_mag', 'w3_mag', 'w4_mag', 'j_k', 'w1_w2', 'w3_w4', 'pm', 'ruwe'
]

CLASSES = ['CWA', 'CWB', 'DCEP', 'DCEPS', 'DSCT', 'EA', 'EB', 'EW',
           'HADS', 'M', 'ROT', 'RRAB', 'RRC', 'RRD', 'RVA', 'SR']

In [3]:
train_dataset = VPSMDatasetV2(split='train', classes=CLASSES)

In [4]:
idx = 0
el = train_dataset.df.iloc[idx]
label = train_dataset.target2id[el['target']]

In [13]:
crop = 'random' if train_dataset.split == 'train' else 'center'
photometry = get_vlc(el['name'], train_dataset.v_prefix, train_dataset.reader_v)

In [14]:
spectra = readLRSFits(os.path.join(train_dataset.lamost_spec_dir, el['spec_filename']), train_dataset.z_corr)

In [15]:
photometry.shape

In [16]:
spectra.shape

In [23]:
def add_noise(X):
    time, flux, flux_err = X[:, 0], X[:, 1], X[:, 2]
    
    # Sample noise from a normal distribution using flux_err
    noise = np.random.normal(0, flux_err)
    
    # Add the noise to the flux
    flux_noisy = flux + noise
    
    # Combine time, noisy flux, and flux_err back into the original shape
    X_noisy = np.column_stack((time, flux_noisy, flux_err))
    
    return X_noisy

In [24]:
noisy_photometry = add_noise(photometry)
noisy_spectra = add_noise(spectra)

In [27]:
plt.figure(figsize=(14, 6))

plt.subplot(1, 2, 1)
plt.plot(photometry[:, 0], photometry[:, 1], '.', label='Original Photometry', color='blue')
plt.xlabel('Time')
plt.ylabel('Flux')
plt.title('Original Photometry')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(noisy_photometry[:, 0], noisy_photometry[:, 1], '.', label='Noisy Photometry', color='red')
plt.xlabel('Time')
plt.ylabel('Flux')
plt.title('Noisy Photometry')
plt.legend()

plt.tight_layout()
plt.show()

In [28]:
plt.figure(figsize=(14, 6))

plt.subplot(1, 2, 1)
plt.plot(spectra[:, 0], spectra[:, 1], '.', label='Original Spectra', color='blue')
plt.xlabel('Time')
plt.ylabel('Flux')
plt.title('Original Spectra')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(noisy_spectra[:, 0], noisy_spectra[:, 1], '.', label='Noisy Spectra', color='red')
plt.xlabel('Time')
plt.ylabel('Flux')
plt.title('Noisy Spectra')
plt.legend()

plt.tight_layout()
plt.show()

In [62]:
photometry.dtype, noisy_photometry.dtype

In [55]:
metadata = el[train_dataset.metadata_cols].values.astype(np.float32)

In [56]:
metadata.dtype

In [57]:
metadata

In [63]:
def augment_metadata(metadata, noise_level=0.01):
    # Generate Gaussian noise
-    
    # Add noise to metadata
    augmented_metadata = metadata + noise
        
    return augmented_metadata

In [64]:
noisy_metadata = augment_metadata(metadata, noise_level=0.01)

In [65]:
noisy_metadata.dtype

In [42]:
for i in range(len(metadata)):
    print(metadata[i], noisy_metadata[i])