In [None]:
import numpy as np
import pandas as pd
import json
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import requests
from io import StringIO
import joblib

In [None]:
def fetch_exoplanet_data():
    # TAP service URL for the ps table
    url = "https://exoplanetarchive.ipac.caltech.edu/TAP/sync"
    query = "SELECT * FROM ps"
    params = {
        "query": query,
        "format": "csv"
    }
    response = requests.get(url, params=params)
    response.raise_for_status()  # Raise an error for bad status codes
    df = pd.read_csv(StringIO(response.text))
    return df

# Load Exoplanet Data
exoplanet_data = fetch_exoplanet_data()
# print(exoplanet_data)

# Select relevant features
features = ['pl_name','pl_massj', 'pl_radj', 'pl_dens', 'pl_orbper', 'st_teff', 'st_met', 'st_logg']
exoplanet_df = exoplanet_data[features].dropna()

  df = pd.read_csv(StringIO(response.text))


In [None]:
spectral_df = pd.read_csv("/content/drive/MyDrive/Colab Datasets/MyData_with_transit_spectrum.csv", low_memory=False)
planetary_df = exoplanet_df  # Ensure exoplanet_df is already loaded
full_y_gas = np.loadtxt("/content/drive/MyDrive/Colab Datasets/y_dataTrain.txt")

In [None]:
def parse_spectrum(x):
    if isinstance(x, str):
        try:
            return json.loads(x)
        except:
            return []
    elif isinstance(x, list):
        return x
    return []

spectral_df['transit_spectrum'] = spectral_df['transit_spectrum'].apply(parse_spectrum)

def pad_or_truncate(spectrum, length=100):
    return spectrum[:length] if len(spectrum) >= length else spectrum + [0] * (length - len(spectrum))

X_spectral = spectral_df['transit_spectrum'].apply(pad_or_truncate)
X_spectral = np.array(X_spectral.tolist())

In [None]:
features = ['pl_massj', 'pl_radj', 'pl_dens', 'pl_orbper', 'st_teff', 'st_met', 'st_logg']
X_planetary = planetary_df[features].values

In [None]:
num_samples = min(len(X_spectral), len(X_planetary))
X_spectral, X_planetary = X_spectral[:num_samples], X_planetary[:num_samples]

In [None]:
scaler = StandardScaler()
X_planetary = scaler.fit_transform(X_planetary)

In [None]:
full_y_gas = np.loadtxt("/content/drive/MyDrive/Colab Datasets/y_dataTrain.txt")  # 10003 rows


In [None]:
latent_dim = 5
input_shape = (12,)

In [None]:
encoder = keras.Sequential([
    layers.Input(shape=input_shape),
    layers.Dense(8, activation='relu'),
    layers.Dense(latent_dim, activation='relu')
])

In [None]:
decoder = keras.Sequential([
    layers.Input(shape=(latent_dim,)),
    layers.Dense(8, activation='relu'),
    layers.Dense(12, activation='sigmoid')
])

In [None]:
autoencoder_input = layers.Input(shape=input_shape)
encoded = encoder(autoencoder_input)
decoded = decoder(encoded)
autoencoder = keras.Model(inputs=autoencoder_input, outputs=decoded)

In [None]:
# Compile the autoencoder
autoencoder.compile(optimizer='adam', loss='mse')

# Train the autoencoder
autoencoder.fit(full_y_gas, full_y_gas, epochs=50, batch_size=32, validation_split=0.2)


Epoch 1/50
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.1515 - val_loss: 0.0080
Epoch 2/50
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0070 - val_loss: 0.0056
Epoch 3/50
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.0052 - val_loss: 0.0041
Epoch 4/50
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.0039 - val_loss: 0.0034
Epoch 5/50
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.0032 - val_loss: 0.0014
Epoch 6/50
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 8.3257e-04 - val_loss: 4.0926e-04
Epoch 7/50
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 4.0912e-04 - val_loss: 3.7984e-04
Epoch 8/50
[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 3.7315e-04 - val_loss: 3.6521e-04
Epoch 9/50
[1m2

<keras.src.callbacks.history.History at 0x7873cdb00590>

In [None]:
y_gas_composition = encoder.predict(full_y_gas)[:num_samples]  # Align to planet dataset

# 🚀 Train-Test Split
X_train_s, X_test_s, X_train_p, X_test_p, y_train, y_test = train_test_split(
    X_spectral, X_planetary, y_gas_composition, test_size=0.2, random_state=42
)

X_train_s = X_train_s.reshape(-1, 100, 1)
X_test_s = X_test_s.reshape(-1, 100, 1)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [None]:
cnn_model = keras.Sequential([
    layers.Conv1D(32, kernel_size=3, activation='relu', input_shape=(100, 1)),
    layers.MaxPooling1D(pool_size=2),
    layers.Conv1D(64, kernel_size=3, activation='relu'),
    layers.GlobalAveragePooling1D(),
    layers.Dense(latent_dim, activation='relu')
])

cnn_model.compile(optimizer='adam', loss='mse', metrics=['mae'])
cnn_model.fit(X_train_s, y_train, validation_data=(X_test_s, y_test), epochs=20, batch_size=32)

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - loss: 2.6187 - mae: 1.2104 - val_loss: 2.6103 - val_mae: 1.2075
Epoch 2/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - loss: 2.6450 - mae: 1.2125 - val_loss: 2.6103 - val_mae: 1.2075
Epoch 3/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 2.6409 - mae: 1.2156 - val_loss: 2.6103 - val_mae: 1.2075
Epoch 4/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 2.6504 - mae: 1.2144 - val_loss: 2.6103 - val_mae: 1.2075
Epoch 5/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 2.6331 - mae: 1.2096 - val_loss: 2.6103 - val_mae: 1.2075
Epoch 6/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 2.6080 - mae: 1.2083 - val_loss: 2.6103 - val_mae: 1.2075
Epoch 7/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 2.6500 - mae: 1

<keras.src.callbacks.history.History at 0x7873ccc130d0>

In [None]:
mlp_model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(len(features),)),
    layers.Dense(32, activation='relu'),
    layers.Dense(latent_dim, activation='relu')
])

mlp_model.compile(optimizer='adam', loss='mse', metrics=['mae'])
mlp_model.fit(X_train_p, y_train, validation_data=(X_test_p, y_test), epochs=20, batch_size=32)

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 1.7818 - mae: 0.9876 - val_loss: 0.8088 - val_mae: 0.6200
Epoch 2/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.7662 - mae: 0.5653 - val_loss: 0.3008 - val_mae: 0.3644
Epoch 3/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.3667 - mae: 0.3836 - val_loss: 0.2274 - val_mae: 0.3173
Epoch 4/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.2633 - mae: 0.3321 - val_loss: 0.1909 - val_mae: 0.2892
Epoch 5/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.2160 - mae: 0.3050 - val_loss: 0.1732 - val_mae: 0.2764
Epoch 6/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.1929 - mae: 0.2892 - val_loss: 0.1613 - val_mae: 0.2668
Epoch 7/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.1753 - mae: 0.280

<keras.src.callbacks.history.History at 0x7873cc9dc050>

In [None]:
X_train_fusion = np.concatenate((cnn_model.predict(X_train_s), mlp_model.predict(X_train_p)), axis=1)
X_test_fusion = np.concatenate((cnn_model.predict(X_test_s), mlp_model.predict(X_test_p)), axis=1)

fusion_model = keras.Sequential([
    layers.Dense(32, activation='relu', input_shape=(latent_dim * 2,)),
    layers.Dense(16, activation='relu'),
    layers.Dense(latent_dim, activation='sigmoid')
])

fusion_model.compile(optimizer='adam', loss='mse', metrics=['mae'])
fusion_model.fit(X_train_fusion, y_train, validation_split=0.2, epochs=20, batch_size=32)

[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Epoch 1/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 1.3985 - mae: 0.9137 - val_loss: 1.1925 - val_mae: 0.8146
Epoch 2/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.1183 - mae: 0.7784 - val_loss: 1.0275 - val_mae: 0.7231
Epoch 3/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.0087 - mae: 0.7104 - val_loss: 0.9589 - val_mae: 0.6793
Epoch 4/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.9328 - mae: 0.6643 - val_loss: 0.9340 - val_mae: 0.6621
Epoch 5/20
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.9135 - mae:

<keras.src.callbacks.history.History at 0x7873ccc88f10>

In [None]:
new_spectrum = np.random.rand(1, 100) * 0.01  # Replace with real data
new_planetary_features = np.array([[90.2, 10.1, 5.5, 100, 1000, 0.02, 10.3]])  # Mass, Radius, Density, orb_period , St_temp, Eccentricity, st_age

new_spectrum_reshaped = new_spectrum.reshape(-1, 100, 1)
new_planetary_scaled = scaler.transform(new_planetary_features)

cnn_features = cnn_model.predict(new_spectrum_reshaped)
mlp_features = mlp_model.predict(new_planetary_scaled)

final_features = np.concatenate((cnn_features, mlp_features), axis=1)
predicted_gas_latent = fusion_model.predict(final_features)
predicted_gases = decoder.predict(predicted_gas_latent)  # Decode back to 12 gases

# 🚀 Display Predictions
gas_labels = ["H2O", "CO2", "O2", "N2", "CH4", "N2O", "CO", "O3", "SO2", "NH3", "C2H6", "NO2"]
for gas, percentage in zip(gas_labels, predicted_gases[0] * 100):
    print(f"{gas}: {percentage:.2f}%")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
H2O: 4.99%
CO2: 65.61%
O2: 28.47%
N2: 31.95%
CH4: 4.53%
N2O: 1.92%
CO: 1.81%
O3: 0.77%
SO2: 1.52%
NH3: 1.61%
C2H6: 0.16%
NO2: 0.13%


In [None]:
fusion_model.summary()

In [None]:
# Evaluate the fusion model
loss, mae = fusion_model.evaluate(X_test_fusion, y_test, verbose=0)
print(f"Test Loss: {loss:.4f}")
print(f"Test Mean Absolute Error: {mae:.4f}")

# Optionally, get predictions and compare them to the true values
y_pred = fusion_model.predict(X_test_fusion)

# Example using Mean Squared Error (MSE) for comparison
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.4f}")


#Example using R-squared
from sklearn.metrics import r2_score

r2 = r2_score(y_test, y_pred)
print(f"R-squared: {r2:.4f}")




Test Loss: 1.6514
Test Mean Absolute Error: 0.9070
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Mean Squared Error: 1.6514
R-squared: -7.8524
