In [5]:
import requests
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

# 🌍 NASA Exoplanet Archive API (Baseline Data)
NASA_ARCHIVE_URL = "https://exoplanetarchive.ipac.caltech.edu/TAP/sync?query=select+pl_name,pl_rade,pl_bmasse,pl_orbper,pl_eqt,st_teff,st_mass,st_rad,st_met+from+pscomppars&format=json"

# 🌟 ESA Gaia Archive (Star & Orbital Stability Data)
GAIA_URL = "https://gea.esac.esa.int/tap-server/tap/sync?REQUEST=doQuery&LANG=ADQL&FORMAT=json&QUERY=SELECT+source_id,teff_val,lum_val FROM gaiadr2.gaia_source WHERE teff_val IS NOT NULL"

# 🔭 TESS Mission Data (New Observations) - Using the Correct MAST API
TESS_URL = "https://mast.stsci.edu/api/v0/invoke"

# Fetch data safely
def fetch_data(url):
    """Fetches data from a given URL and returns JSON."""
    try:
        response = requests.get(url, timeout=10)
        if response.status_code == 200:
            try:
                data = response.json()
                if isinstance(data, dict) and 'data' in data:
                    return data['data']  # Extract the right part
                return data
            except requests.exceptions.JSONDecodeError:
                print(f"❌ JSON decoding failed for {url}")
                return []
        else:
            print(f"❌ Failed to fetch data from {url}. Status Code: {response.status_code}")
            return []
    except requests.exceptions.RequestException as e:
        print(f"❌ Request failed for {url}: {e}")
        return []


# Fetch Data
nasa_data = fetch_data(NASA_ARCHIVE_URL)
gaia_data = fetch_data(GAIA_URL)
tess_data = fetch_data(TESS_URL)  # Using correct TESS API

# Convert NASA Data to DataFrame
df = pd.DataFrame(nasa_data)
if df.empty:
    raise ValueError("❌ NASA data fetch failed. Cannot proceed.")

# Convert numeric columns to proper format
numeric_columns = ['pl_rade', 'pl_bmasse', 'pl_orbper', 'pl_eqt', 'st_teff', 'st_mass', 'st_rad', 'st_met']
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')
df = df.dropna(subset=numeric_columns)  # Remove rows with missing values

# ✅ Merge Gaia data (Ensure Consistency)
if gaia_data:
    gaia_df = pd.DataFrame(gaia_data)
    if not gaia_df.empty and 'teff_val' in gaia_df.columns:
        df = df.merge(gaia_df, left_on='st_teff', right_on='teff_val', how='left')

# ✅ Merge TESS data (Check for Length Mismatch)
if tess_data:
    tess_df = pd.DataFrame(tess_data)
    if not tess_df.empty:
        df = pd.concat([df, tess_df], ignore_index=True, sort=False)

# 🧠 Habitability Scoring Functions
def mass_score(mass):
    return mass * 10 if mass <= 2 else (20 - (mass - 2) * 5 if mass <= 5 else 0)

def star_score(teff):
    return max(0, (7000 - abs(teff - 5800)) / 100) if 5000 <= teff <= 7000 else 0

def stability_score(orbital_period):
    return min(10, 10 - abs(orbital_period - 365) / 50) if 50 <= orbital_period <= 500 else 0

def atmosphere_potential(mass, radius):
    if mass <= 0.5:
        return 0
    elif mass <= 2.5:
        return 10
    return 2 if mass > 5 else 5

def terraformability_score(row):
    if row['pl_rade'] > 3:
        return 0
    gravity_score = max(0, 10 - abs(row['pl_bmasse'] - 1) * 2)
    temp_score = max(0, 10 - abs(row['pl_eqt'] - 288) / 10) if 230 <= row['pl_eqt'] <= 330 else 0
    atmosphere_score = atmosphere_potential(row['pl_bmasse'], row['pl_rade'])
    water_score = 10 if 273 <= row['pl_eqt'] <= 373 else 0
    return min(100, (gravity_score + temp_score + atmosphere_score + water_score) * 2.5)

def rule_based_score(row):
    if row['pl_rade'] > 3:
        return 0
    mass_component = mass_score(row['pl_bmasse'])
    temp_component = max(0, (300 - abs(row['pl_eqt'] - 300)) / 3) if 200 <= row['pl_eqt'] <= 400 else 0
    star_component = star_score(row['st_teff'])
    orbit_component = stability_score(row['pl_orbper'])
    return min(100, mass_component + temp_component + star_component + orbit_component)

df['rule_habitability_score'] = df.apply(rule_based_score, axis=1)
df['terraformability_score'] = df.apply(terraformability_score, axis=1)

# ✅ Train ML Model
X = df[numeric_columns]
y = df['rule_habitability_score']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print(f"\n📊 Model trained! Mean Absolute Error: {mean_absolute_error(y_test, y_pred):.2f}")

# 🌍 Habitability Prediction Function
def predict_habitability(planet_data):
    planet_features = pd.DataFrame([planet_data[numeric_columns].astype(float)], columns=numeric_columns)
    prediction = model.predict(planet_features)[0]
    return min(100, round(prediction, 2))

# 🏆 Main Execution
def main():
    while True:
        planet_name = input("\nEnter exoplanet name (or type 'exit' to quit): ").strip()

        if planet_name.lower() == 'exit':
            print("\n👋 Exiting program. Goodbye!")
            break

        planet_data = df[df['pl_name'].str.lower() == planet_name.lower()]

        if planet_data.empty:
            print("\n❌ Planet not found in the database. Please try again.")
            continue

        planet_data = planet_data.iloc[0]
        ml_score = predict_habitability(planet_data)
        rule_score = planet_data['rule_habitability_score']
        terra_score = planet_data['terraformability_score']

        print(f"\n🌍 ML Habitability Prediction for {planet_name}: {ml_score}%")
        print(f"🔬 Rule-Based Habitability Score: {rule_score}%")
        print(f"🌱 Terraformability Score: {terra_score}%\n")

if __name__ == "__main__":
    main()


✅ Successfully fetched data from exoplanetarchive.ipac.caltech.edu
✅ Successfully fetched data from gea.esac.esa.int
❌ Failed to fetch data from https://mast.stsci.edu/api/v0/invoke. Status Code: 500


ValueError: All arrays must be of the same length