In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import pandas as pd
import numpy as np
import xgboost as xgb
import google.generativeai as genai
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

# 🔑 Set up Gemini API key
genai.configure(api_key="AIzaSyAuSce1nyreQQtLOl61a_Mfo0eWlej62HU")

# 🔥 Load Gemini Model
gemini_model = genai.GenerativeModel("gemini-2.0-flash")

# ✅ Load Exoplanet Data
CSV_FILE_PATH = "exoplanet_scores_Final.csv"

try:
    df = pd.read_csv(CSV_FILE_PATH)
    print(f"\n✅ Successfully loaded data from {CSV_FILE_PATH}")
except Exception as e:
    raise ValueError(f"\n❌ Failed to load CSV file: {e}")

# ✅ Convert necessary columns to numeric (ignoring errors)
numeric_columns = [
    'pl_rade', 'pl_bmasse', 'pl_orbper', 'pl_eqt', 'st_teff', 'st_mass', 'st_rad', 'st_met',
    'pl_eqt_normalized', 'surface_gravity_normalized', 'Habitability Score', 'Terraformability Score',
    'st_activity', 'pl_atmos', 'pl_surf_temp', 'pl_escape_vel', 'pl_radiation_flux', 'ESI', 'pl_water_probability'
]

df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')
df.dropna(subset=numeric_columns, inplace=True)

# ✅ Habitability Score Calculation (Improved)
def rule_based_score(row):
    if row['pl_rade'] > 5:  # Adjusted limit for potentially habitable planets
        return 0
    mass_component = max(0, min(30, row['pl_bmasse'] * 5))  
    temp_component = max(0, min(40, (300 - abs(row['pl_eqt'] - 300)) / 2))  
    atmosphere_component = max(0, min(30, (row['st_met'] + 0.5) * 15))  # Star's metallicity contribution
    return min(100, mass_component + temp_component + atmosphere_component)

# ✅ Terraformability Score Calculation (Improved)
def terraformability_score(row):
    if row['pl_eqt'] < 180 or row['pl_eqt'] > 420:
        return 10  # Allow slight terraformability instead of 0
    
    atmosphere_factor = max(0, min(35, (row['st_met'] + 0.5) * 18))  
    gravity_factor = max(0, min(35, 9.8 / (row['pl_bmasse'] ** 0.6)))  
    temp_factor = max(0, min(30, (300 - abs(row['pl_eqt'] - 300)) / 2))  

    return min(100, atmosphere_factor + gravity_factor + temp_factor)

# ✅ Apply Updated Calculations
df['Habitability Score'] = df.apply(rule_based_score, axis=1)
df['Terraformability Score'] = df.apply(terraformability_score, axis=1)

# 🔮 Machine Learning Model Training (Improved)
X = df[numeric_columns].drop(columns=['Habitability Score'])
y = df['Habitability Score']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

xgb_model = xgb.XGBRegressor(n_estimators=300, learning_rate=0.05, max_depth=7)
xgb_model.fit(X_train, y_train)

y_pred = np.clip(xgb_model.predict(X_test), 0, 100)  # Ensure predictions are within [0,100]
mae = mean_absolute_error(y_test, y_pred)
print(f"\n✅ Model trained successfully! MAE: {mae:.4f}\n")

# 🧠 AI-Based Habitability Explanation (Improved)
def generate_reasoning(planet_data, habitability, terraformability):
    prompt = f"""
    You are an astrophysicist analyzing exoplanet habitability.

    ### 🌍 Exoplanet: {planet_data['pl_name']}
    - **Mass**: {planet_data['pl_bmasse']} Earth masses
    - **Radius**: {planet_data['pl_rade']} Earth radii
    - **Surface Temperature**: {planet_data['pl_eqt']} K
    - **Orbital Period**: {planet_data['pl_orbper']} days
    - **Host Star Temperature**: {planet_data['st_teff']} K
    - **Stellar Mass**: {planet_data['st_mass']} Solar masses
    - **Metallicity**: {planet_data['st_met']} [Fe/H]

    ### 🏞️ Habitability Score: {habitability}%
    - **Explanation:** Based on mass, temperature, and atmosphere composition, this planet has a habitability potential of {habitability}%.

    ### 🌏 Terraformability Score: {terraformability}%
    - **Explanation:** This score considers gravity, temperature range, and stellar radiation to estimate the planet's ability to be terraformed.

    **Final Conclusion:** Should this exoplanet be considered for future colonization?
    """
    
    response = gemini_model.generate_content(prompt)
    return response.text

# 🛠️ Main Execution Loop
def main():
    while True:
        user_input = input("\n🔍 Enter exoplanet name (or 'exit' to quit): ").strip().lower()
        if user_input == 'exit':
            print("\n🚀 Exiting program. Goodbye!\n")
            break

        # ✅ Check if the planet exists in the dataset
        planet_data = df[df['pl_name'].str.lower() == user_input]
        if planet_data.empty:
            print("\n❌ Planet not found. Try another name.\n")
            continue

        # ✅ Extract the planet's data
        planet_data = planet_data.iloc[0]

        # ✅ Predict ML-based habitability score
        ml_score = float(np.clip(xgb_model.predict(pd.DataFrame([planet_data[numeric_columns].drop(['Habitability Score'])]))[0], 0, 100))

        # ✅ Compute Terraformability Score
        terraformability = terraformability_score(planet_data)

        print(f"\n🌍 **{planet_data['pl_name']} Analysis:**")
        print(f"📊 ML Habitability Prediction: {ml_score:.2f}%")
        print(f"📏 Rule-Based Habitability Score: {planet_data['Habitability Score']}%")
        print(f"🌏 Terraformability Score: {terraformability}%")

        # ✅ AI Reasoning with Gemini
        print("\n🧠 AI Reasoning:") 
        print(generate_reasoning(planet_data, ml_score, terraformability))
        print("=================================")

main()



✅ Successfully loaded data from exoplanet_scores_Final.csv

✅ Model trained successfully! MAE: 0.2428




🔍 Enter exoplanet name (or 'exit' to quit):  TRAPPIST-1e



❌ Planet not found. Try another name.




🔍 Enter exoplanet name (or 'exit' to quit):  



❌ Planet not found. Try another name.



In [2]:
import pandas as pd

df = pd.read_csv("exoplanet_scores_Final.csv")
df

Unnamed: 0,pl_name,pl_rade,pl_bmasse,pl_orbper,pl_eqt,st_teff,st_mass,st_rad,st_met,pl_eqt_normalized,...,surface_gravity_normalized,Habitability Score,Terraformability Score,st_activity,pl_atmos,pl_surf_temp,pl_escape_vel,pl_radiation_flux,ESI,pl_water_probability
0,OGLE-2016-BLG-1227L b,13.900,250.00000,,912.530715,5412.39249,0.10,,0.014263,0.000000,...,-0.153474,42.577623,73.508386,0.646902,0.999960,1003.780144,47.498580,0.000000,0.001789,0.000000
1,Kepler-24 e,2.780,8.15000,18.998355,792.000000,5897.00000,1.05,1.29,-0.071000,-0.302064,...,-0.167278,44.618633,53.581554,0.525750,0.178098,806.105394,19.176725,0.253581,0.015668,0.000000
2,Kepler-1065 b,3.730,13.40000,3.609309,1092.000000,5635.00000,0.94,0.93,-0.010000,0.449771,...,-0.172550,45.053726,53.587472,0.591250,0.075347,1100.227930,21.228339,0.332518,0.003489,0.000000
3,HD 132406 b,12.800,1887.90074,908.000000,912.530715,5766.00000,0.97,1.34,0.129000,0.000000,...,0.436389,42.707095,94.344883,0.558500,1.000000,1003.783786,136.019952,0.018991,0.000982,0.000000
4,TOI-1260 c,2.760,13.20000,7.493134,651.000000,4227.00000,0.68,0.67,-0.100000,-0.655426,...,-0.128165,43.154515,54.718152,0.943250,0.355992,674.175104,24.493477,0.033578,0.028754,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5851,GJ 229 b,3.970,14.93794,579.474950,912.530715,3564.00000,0.51,0.46,0.014263,0.000000,...,-0.173435,42.645897,50.140841,1.000000,0.141986,925.487390,21.725417,0.000441,0.007109,0.000000
5852,GJ 229 A c,2.870,8.58137,121.932680,912.530715,3912.54000,0.51,0.46,0.014263,0.000000,...,-0.168013,42.676041,48.575035,1.000000,0.110219,922.588517,19.366689,0.001809,0.008752,0.000000
5853,Kepler-974 b,1.570,3.09000,4.194497,577.000000,3687.00000,0.52,0.50,0.070000,-0.840878,...,-0.155800,43.545388,48.943411,1.000000,0.277395,593.005696,15.712578,0.015937,0.071172,0.000000
5854,KOI-1843.03,0.610,8.00000,0.176891,1654.000000,3584.00000,0.46,0.45,0.000000,1.858207,...,1.011712,43.259542,67.051135,1.000000,0.741519,1776.647258,40.560014,0.095130,0.000383,0.000000


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5856 entries, 0 to 5855
Data columns (total 22 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   pl_name                     5856 non-null   object 
 1   pl_rade                     5856 non-null   float64
 2   pl_bmasse                   5856 non-null   float64
 3   pl_orbper                   5570 non-null   float64
 4   pl_eqt                      5856 non-null   float64
 5   st_teff                     5856 non-null   float64
 6   st_mass                     5849 non-null   float64
 7   st_rad                      5585 non-null   float64
 8   st_met                      5856 non-null   float64
 9   pl_eqt_normalized           5856 non-null   float64
 10  st_met_normalized           5856 non-null   float64
 11  surface_gravity             5856 non-null   float64
 12  surface_gravity_normalized  5856 non-null   float64
 13  Habitability Score          5856 