In [1]:
import requests
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error
import xgboost as xgb  # Import XGBoost
from transformers import pipeline  # Import Hugging Face for AI reasoning

# Load Mistral AI model for reasoning
ai_model = pipeline("text-generation", model="mistralai/Mistral-7B-v0.1", device="cuda")

# 🌍 NASA Exoplanet Archive API (Baseline Data)
NASA_ARCHIVE_URL = "https://exoplanetarchive.ipac.caltech.edu/TAP/sync?query=select+pl_name,pl_rade,pl_bmasse,pl_orbper,pl_eqt,st_teff,st_mass,st_rad,st_met+from+pscomppars&format=json"

def fetch_data(url):
    try:
        response = requests.get(url, timeout=10)
        if response.status_code == 200:
            return response.json()
        else:
            print(f"❌ Failed to fetch data. Status Code: {response.status_code}")
            return []
    except requests.exceptions.RequestException as e:
        print(f"❌ Request failed: {e}")
        return []

# Fetch and process data
nasa_data = fetch_data(NASA_ARCHIVE_URL)
df = pd.DataFrame(nasa_data)
if df.empty:
    raise ValueError("❌ NASA data fetch failed. Cannot proceed.")

numeric_columns = ['pl_rade', 'pl_bmasse', 'pl_orbper', 'pl_eqt', 'st_teff', 'st_mass', 'st_rad', 'st_met']
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')
df = df.dropna(subset=numeric_columns)  # Remove rows with missing values

# 🧠 Habitability Scoring Functions
def mass_score(mass):
    return mass * 10 if mass <= 2 else (20 - (mass - 2) * 5 if mass <= 5 else 0)

def star_score(teff):
    return max(0, (7000 - abs(teff - 5800)) / 100) if 5000 <= teff <= 7000 else 0

def stability_score(orbital_period):
    return min(10, 10 - abs(orbital_period - 365) / 50) if 50 <= orbital_period <= 500 else 0

def atmosphere_potential(mass, radius):
    if mass <= 0.5:
        return 0
    elif mass <= 2.5:
        return 10
    return 2 if mass > 5 else 5

def terraformability_score(row):
    if row['pl_rade'] > 3:
        return 0
    gravity_score = max(0, 10 - abs(row['pl_bmasse'] - 1) * 2)
    temp_score = max(0, 10 - abs(row['pl_eqt'] - 288) / 10) if 230 <= row['pl_eqt'] <= 330 else 0
    atmosphere_score = atmosphere_potential(row['pl_bmasse'], row['pl_rade'])
    water_score = 10 if 273 <= row['pl_eqt'] <= 373 else 0
    return min(100, (gravity_score + temp_score + atmosphere_score + water_score) * 2.5)

def rule_based_score(row):
    if row['pl_rade'] > 3:
        return 0
    mass_component = mass_score(row['pl_bmasse'])
    temp_component = max(0, (300 - abs(row['pl_eqt'] - 300)) / 3) if 200 <= row['pl_eqt'] <= 400 else 0
    star_component = star_score(row['st_teff'])
    orbit_component = stability_score(row['pl_orbper'])
    return min(100, mass_component + temp_component + star_component + orbit_component)

df['rule_habitability_score'] = df.apply(rule_based_score, axis=1)
df['terraformability_score'] = df.apply(terraformability_score, axis=1)

# ✅ Train XGBoost Model
X = df[numeric_columns]
y = df['rule_habitability_score']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

xgb_model = xgb.XGBRegressor()
param_grid = {'n_estimators': [100, 200, 300], 'learning_rate': [0.01, 0.05, 0.1], 'max_depth': [3, 5, 7]}

grid_search = GridSearchCV(xgb_model, param_grid, cv=3, scoring='neg_mean_absolute_error', n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
print(f"\n✅ Best Parameters: {grid_search.best_params_}")
print(f"📊 Mean Absolute Error: {mean_absolute_error(y_test, y_pred):.2f}")

# 🌍 AI Reasoning Function
def generate_reasoning(planet_data):
    prompt = f"""
    Analyze the habitability of {planet_data['pl_name']} based on its characteristics:
    - Mass: {planet_data['pl_bmasse']} Earth masses
    - Radius: {planet_data['pl_rade']} Earth radii
    - Temperature: {planet_data['pl_eqt']} K
    - Orbital Period: {planet_data['pl_orbper']} days
    - Host Star Temperature: {planet_data['st_teff']} K
    Explain why this planet may or may not be habitable.
    """
    response = ai_model(prompt, max_length=250, do_sample=True)
    return response[0]['generated_text']

# 🌍 Habitability Prediction
def predict_habitability(planet_data):
    planet_features = pd.DataFrame([planet_data[numeric_columns].astype(float)], columns=numeric_columns)
    prediction = best_model.predict(planet_features)[0]
    return round(max(0, min(100, prediction)), 2)

def main():
    while True:
        planet_name = input("\nEnter exoplanet name (or 'exit' to quit): ").strip()
        if planet_name.lower() == 'exit':
            print("\n👋 Exiting program. Goodbye!")
            break

        planet_data = df[df['pl_name'].str.lower() == planet_name.lower()]
        if planet_data.empty:
            print("\n❌ Planet not found in the database. Try again.")
            continue

        planet_data = planet_data.iloc[0]
        ml_score = predict_habitability(planet_data)
        rule_score = planet_data['rule_habitability_score']
        terra_score = planet_data['terraformability_score']
        ai_reasoning = generate_reasoning(planet_data)

        print(f"\n🌍 ML Habitability Prediction for {planet_name}: {ml_score}%")
        print(f"🔬 Rule-Based Habitability Score: {rule_score}%")
        print(f"🌱 Terraformability Score: {terra_score}%")
        print(f"🤖 AI Reasoning: {ai_reasoning}\n")

if __name__ == "__main__":
    main()


OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/mistralai/Mistral-7B-v0.1.
403 Client Error. (Request ID: Root=1-67d1fb0f-4bf2fd6619c55bda7db4698a;9d67c5fc-869f-4e73-8199-4c186684fb36)

Cannot access gated repo for url https://huggingface.co/mistralai/Mistral-7B-v0.1/resolve/main/config.json.
Access to model mistralai/Mistral-7B-v0.1 is restricted and you are not in the authorized list. Visit https://huggingface.co/mistralai/Mistral-7B-v0.1 to ask for access.