In [1]:
from huggingface_hub import login
login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
import requests
import pandas as pd
import numpy as np
import time
import xgboost as xgb
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from transformers import AutoModelForCausalLM, AutoTokenizer

# Install necessary packages (for Google Colab)
!pip install transformers accelerate bitsandbytes xformers

# NASA Exoplanet Archive API
NASA_ARCHIVE_URL = (
    "https://exoplanetarchive.ipac.caltech.edu/TAP/sync?query="
    "select+pl_name,pl_rade,pl_bmasse,pl_orbper,pl_eqt,st_teff,st_mass,st_rad,st_met+from+pscomppars&format=json"
)

# Load AI Model for Reasoning (Mistral 7B)
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto",
    load_in_4bit=True
)

print("✅ Mistral 7B Model Loaded Successfully!")

# Function to Fetch Data
def fetch_data(url, retries=3, delay=5):
    for attempt in range(retries):
        try:
            print(f"[Attempt {attempt + 1}] Fetching data from NASA API...")
            response = requests.get(url, timeout=20)
            response.raise_for_status()
            data = response.json()
            if isinstance(data, list) and data:
                return data
            else:
                print("[Warning] Empty or invalid response, retrying...")
        except requests.exceptions.RequestException as e:
            print(f"[Error] Request failed: {e}")
        time.sleep(delay)
    return []

# Fetch Data
nasa_data = fetch_data(NASA_ARCHIVE_URL)
df = pd.DataFrame(nasa_data)

# Debugging: Check if data is correctly loaded
if df.empty:
    raise ValueError("[Fatal] NASA data fetch failed. Cannot proceed.")

# Ensure all column names are lowercase for consistent access
df.columns = df.columns.str.lower()

# Ensure planet names are properly formatted
df['pl_name'] = df['pl_name'].astype(str).str.strip().str.lower()

# Check number of planets fetched
print(f"✅ Total planets fetched: {len(df)}")

# Data Preprocessing
numeric_columns = ['pl_rade', 'pl_bmasse', 'pl_orbper', 'pl_eqt', 'st_teff', 'st_mass', 'st_rad', 'st_met']
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')
df.dropna(subset=numeric_columns, inplace=True)

def rule_based_score(row):
    if row['pl_rade'] > 3:
        return 0
    mass_component = max(0, 10 if row['pl_bmasse'] <= 2 else (20 - (row['pl_bmasse'] - 2) * 5))
    temp_component = max(0, (300 - abs(row['pl_eqt'] - 300)) / 3) if 200 <= row['pl_eqt'] <= 400 else 0
    star_component = max(0, (7000 - abs(row['st_teff'] - 5800)) / 100) if 5000 <= row['st_teff'] <= 7000 else 0
    orbit_component = min(10, 10 - abs(row['pl_orbper'] - 365) / 50) if 50 <= row['pl_orbper'] <= 500 else 0
    return min(100, mass_component + temp_component + star_component + orbit_component)

def terraformability_score(row):
    if row['pl_rade'] > 3:
        return 0
    gravity_score = max(0, 10 - abs(row['pl_bmasse'] - 1) * 2)
    temp_score = max(0, 10 - abs(row['pl_eqt'] - 288) / 10) if 230 <= row['pl_eqt'] <= 330 else 0
    atmosphere_score = 10 if row['pl_bmasse'] <= 2.5 else 2
    water_score = 10 if 273 <= row['pl_eqt'] <= 373 else 0
    return min(100, (gravity_score + temp_score + atmosphere_score + water_score) * 2.5)

df['rule_habitability_score'] = df.apply(rule_based_score, axis=1)
df['terraformability_score'] = df.apply(terraformability_score, axis=1)

# Train XGBoost Model
X, y = df[numeric_columns], df['rule_habitability_score']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

xgb_model = xgb.XGBRegressor(n_estimators=300, learning_rate=0.05, max_depth=7, early_stopping_rounds=10, eval_metric="mae")
xgb_model.fit(X_train, y_train, eval_set=[(X_test, y_test)], verbose=False)

# Model Accuracy
y_pred = xgb_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"\n✅ Model trained successfully! MAE: {mae:.4f}\n")

# AI Reasoning Function
def generate_reasoning(planet_data):
    prompt = f"""
    Analyze the habitability of {planet_data['pl_name']} based on its characteristics:
    - Mass: {planet_data['pl_bmasse']} Earth masses
    - Radius: {planet_data['pl_rade']} Earth radii
    - Temperature: {planet_data['pl_eqt']} K
    - Orbital Period: {planet_data['pl_orbper']} days
    - Host Star Temperature: {planet_data['st_teff']} K
    """

    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_length=400, do_sample=True, top_p=0.9)

    return tokenizer.decode(outputs[0], skip_special_tokens=True)

print("🧠 AI Reasoning Ready!")

# Main Program
def main():
    while True:
        user_input = input("\n🔍 Enter exoplanet name ('list' to view all, 'exit' to quit): ").strip().lower()

        if user_input == 'exit':
            print("\n🚀 Exiting program. Goodbye!\n")
            break

        elif user_input == 'list':
            print("\n📜 Available Exoplanets:\n")
            print("\n".join(df['pl_name'].unique()))
            continue

        # Search for the planet
        planet_data = df[df['pl_name'] == user_input]

        if planet_data.empty:
            print("\n❌ Planet not found. Try another name or type 'list' to see all planets.\n")
            continue

        planet_data = planet_data.iloc[0]
        ml_score = xgb_model.predict(pd.DataFrame([planet_data[numeric_columns]]))[0]

        print(f"\n🌍 **{planet_data['pl_name'].title()} Analysis:**")
        print(f"📊 ML Habitability Prediction: {ml_score:.2f}%")
        print(f"📏 Rule-Based Habitability Score: {planet_data['rule_habitability_score']}%")
        print(f"🛠 Terraformability Score: {planet_data['terraformability_score']}%")
        print("\n🧠 AI Reasoning:")
        print(generate_reasoning(planet_data))
        print("=================================")

main()


In [None]:
print("Columns in DataFrame:", df.columns)
print(df.head())  # Print first few rows to inspect format


In [None]:
print("Columns in DataFrame:", df.columns)
print(df.head())  # Print first few rows to inspect format


In [None]:
df.columns = df.columns.str.lower()  # Convert all column names to lowercase


In [None]:
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')
df.dropna(subset=numeric_columns, inplace=True)


In [None]:
df['pl_name'] = df['pl_name'].astype(str)


In [None]:
print(f"Total planets fetched: {len(df)}")
if df.empty:
    print("[ERROR] No planets found. Check API response format.")
