In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load tokenizer and model
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")


In [2]:
import requests
import pandas as pd
import time

# NASA API URL
NASA_ARCHIVE_URL = "https://exoplanetarchive.ipac.caltech.edu/TAP/sync?query=select+pl_name+from+pscomppars&format=json"

def fetch_planets(url, retries=3, delay=5):
    """Fetches planet names from NASA API with retry logic."""
    for attempt in range(retries):
        try:
            response = requests.get(url, timeout=30)  # Increased timeout
            response.raise_for_status()  # Raise error for bad responses
            data = response.json()
            df = pd.DataFrame(data)
            return df["pl_name"].tolist()
        except requests.exceptions.Timeout:
            print(f"⚠️ Timeout occurred. Retrying... ({attempt + 1}/{retries})")
        except requests.exceptions.RequestException as e:
            print(f"❌ Request failed: {e}")
            break  # Stop retrying on other errors
        time.sleep(delay)  # Wait before retrying
    return []

# Fetch and display planets
planets = fetch_planets(NASA_ARCHIVE_URL)

if planets:
    print(f"\n🌍 Found {len(planets)} exoplanets in NASA's archive:\n")
    for i, planet in enumerate(planets[:20]):  # Show first 20 planets
        print(f"{i}. {planet}")
else:
    print("\n❌ No planets found. Check API connection.")



🌍 Found 5856 exoplanets in NASA's archive:

0. OGLE-2016-BLG-1227L b
1. Kepler-24 e
2. Kepler-1065 b
3. HD 132406 b
4. TOI-1260 c
5. HD 149143 b
6. HD 99492 b
7. nu Oph c
8. 75 Cet b
9. gam Lib b
10. HD 113337 b
11. HD 28109 c
12. K2-350 b
13. K2-342 b
14. EPIC 212587672 b
15. K2-332 b
16. K2-340 b
17. GJ 486 b
18. TOI-6002 b
19. HIP 12961 b


In [None]:
import requests
import pandas as pd
import numpy as np
import time
import xgboost as xgb
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from transformers import AutoModelForCausalLM, AutoTokenizer

# NASA Exoplanet Archive API
NASA_ARCHIVE_URL = (
    "https://exoplanetarchive.ipac.caltech.edu/TAP/sync?"
    "query=select+pl_name,pl_rade,pl_bmasse,pl_orbper,pl_eqt,st_teff,st_mass,st_rad,st_met+from+pscomppars&format=json"
)

# Load AI Model for Reasoning
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")

# Function to Fetch Data with Retry Logic
def fetch_data(url, retries=3, delay=5):
    for attempt in range(retries):
        try:
            print(f"[Attempt {attempt + 1}] Fetching data from NASA API...")
            response = requests.get(url, timeout=20)
            response.raise_for_status()
            data = response.json()
            if isinstance(data, list) and data:
                return data
            else:
                print("[Warning] Empty or invalid response, retrying...")
        except requests.exceptions.RequestException as e:
            print(f"[Error] Request failed: {e}")
        time.sleep(delay)
    return []

# Fetch Data
nasa_data = fetch_data(NASA_ARCHIVE_URL)
df = pd.DataFrame(nasa_data)
if df.empty:
    raise ValueError("[Fatal] NASA data fetch failed. Cannot proceed.")

# Data Preprocessing
numeric_columns = ['pl_rade', 'pl_bmasse', 'pl_orbper', 'pl_eqt', 'st_teff', 'st_mass', 'st_rad', 'st_met']
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')
df.dropna(subset=numeric_columns, inplace=True)

def mass_score(mass):
    return mass * 10 if mass <= 2 else (20 - (mass - 2) * 5 if mass <= 5 else 0)

def star_score(teff):
    return max(0, (7000 - abs(teff - 5800)) / 100) if 5000 <= teff <= 7000 else 0

def stability_score(orbital_period):
    return min(10, 10 - abs(orbital_period - 365) / 50) if 50 <= orbital_period <= 500 else 0

def atmosphere_potential(mass, radius):
    if mass <= 0.5:
        return 0
    elif mass <= 2.5:
        return 10
    return 2 if mass > 5 else 5

def terraformability_score(row):
    if row['pl_rade'] > 3:
        return 0
    gravity_score = max(0, 10 - abs(row['pl_bmasse'] - 1) * 2)
    temp_score = max(0, 10 - abs(row['pl_eqt'] - 288) / 10) if 230 <= row['pl_eqt'] <= 330 else 0
    atmosphere_score = atmosphere_potential(row['pl_bmasse'], row['pl_rade'])
    water_score = 10 if 273 <= row['pl_eqt'] <= 373 else 0
    return min(100, (gravity_score + temp_score + atmosphere_score + water_score) * 2.5)

def rule_based_score(row):
    if row['pl_rade'] > 3:
        return 0
    mass_component = mass_score(row['pl_bmasse'])
    temp_component = max(0, (300 - abs(row['pl_eqt'] - 300)) / 3) if 200 <= row['pl_eqt'] <= 400 else 0
    star_component = star_score(row['st_teff'])
    orbit_component = stability_score(row['pl_orbper'])
    return min(100, mass_component + temp_component + star_component + orbit_component)

df['rule_habitability_score'] = df.apply(rule_based_score, axis=1)
df['terraformability_score'] = df.apply(terraformability_score, axis=1)

# Train XGBoost Model
X, y = df[numeric_columns], df['rule_habitability_score']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

xgb_model = xgb.XGBRegressor(n_estimators=300, learning_rate=0.05, max_depth=7, early_stopping_rounds=10, eval_metric="mae")
xgb_model.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=False)

# Model Accuracy
y_pred = xgb_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"\n✅ Model trained successfully! MAE: {mae:.4f}\n")

# AI Reasoning Function
def generate_reasoning(planet_data):
    prompt = f"""
    You are an advanced AI specialized in astrophysics and exoplanetary science. 
    Analyze the habitability potential of the exoplanet **{planet_data['pl_name']}** based on the given characteristics:
    
    - **Mass**: {planet_data['pl_bmasse']} Earth masses  
    - **Radius**: {planet_data['pl_rade']} Earth radii  
    - **Equilibrium Temperature**: {planet_data['pl_eqt']} K  
    - **Orbital Period**: {planet_data['pl_orbper']} days  
    - **Host Star Temperature**: {planet_data['st_teff']} K  
    - **Host Star Mass**: {planet_data['st_mass']} Solar masses  
    - **Host Star Radius**: {planet_data['st_rad']} Solar radii  
    - **Stellar Metallicity**: {planet_data['st_met']} [Fe/H]  

    ### **Analyze the Habitability Potential Based On:**
    1. **Atmospheric Retention**  
       - Given the planet's mass and radius, is it likely to retain an atmosphere?  
       - Compare its escape velocity to Earth's.  
    2. **Surface Temperature & Liquid Water**  
       - Does the equilibrium temperature suggest water can exist in liquid form?  
       - Consider greenhouse effects and potential atmospheric composition.  
    3. **Gravitational Stability**  
       - Does the planet's mass and radius allow for stable surface conditions?  
    4. **Star’s Influence & Radiation Levels**  
       - Is the host star’s temperature and radiation output suitable for habitability?  
       - Does the planet receive excessive UV or X-ray radiation?  
    5. **Orbital Stability & Tidal Effects**  
       - Is the planet’s orbital period stable enough to maintain long-term habitability?  
       - Could tidal locking affect climate conditions?  
    6. **Metallicity & Planetary Formation**  
       - Does the host star's metallicity suggest a rocky composition?  
       
    ### **Final Assessment**  
    Based on the above factors, conclude whether **{planet_data['pl_name']}** is potentially habitable or not.  
    Provide a logical, **scientifically backed** explanation.
    """
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_length=800, do_sample=True, top_p=0.9)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Main Program
def main():
    while True:
        user_input = input("\n🔍 Enter exoplanet name (or 'exit' to quit): ").strip().lower()
        if user_input == 'exit':
            print("\n🚀 Exiting program. Goodbye!\n")
            break

        planet_data = df[df['pl_name'].str.lower() == user_input]
        if planet_data.empty:
            print("\n❌ Planet not found. Try another name.\n")
            continue

        planet_data = planet_data.iloc[0]
        ml_score = xgb_model.predict(pd.DataFrame([planet_data[numeric_columns]]))[0]
        print(f"\n🌍 **{planet_data['pl_name']} Analysis:**")
        print(f"📊 ML Habitability Prediction: {ml_score:.2f}%")
        print(f"📏 Rule-Based Habitability Score: {planet_data['rule_habitability_score']}%")
        print(f"🛠 Terraformability Score: {planet_data['terraformability_score']}%")
        print("\n🧠 AI Reasoning:")
        print(generate_reasoning(planet_data))
        print("=================================")

main()


[Attempt 1] Fetching data from NASA API...

✅ Model trained successfully! MAE: 0.7823




🔍 Enter exoplanet name (or 'exit' to quit):  Kepler-442 b



🌍 **Kepler-442 b Analysis:**
📊 ML Habitability Prediction: 99.79%
📏 Rule-Based Habitability Score: 100.0%
🛠 Terraformability Score: 56.449999999999996%

🧠 AI Reasoning:

    You are an advanced AI specialized in astrophysics and exoplanetary science. 
    Analyze the habitability potential of the exoplanet **Kepler-442 b** based on the given characteristics:
    
    - **Mass**: 2.36 Earth masses  
    - **Radius**: 1.34 Earth radii  
    - **Equilibrium Temperature**: 241.0 K  
    - **Orbital Period**: 112.3053 days  
    - **Host Star Temperature**: 4402.0 K  
    - **Host Star Mass**: 0.61 Solar masses  
    - **Host Star Radius**: 0.6 Solar radii  
    - **Stellar Metallicity**: -0.37 [Fe/H]  

    ### **Analyze the Habitability Potential Based On:**
    1. **Atmospheric Retention**  
       - Given the planet's mass and radius, is it likely to retain an atmosphere?  
       - Compare its escape velocity to Earth's.  
    2. **Surface Temperature & Liquid Water**  
       - Does 