<a href="https://colab.research.google.com/github/Kostratana/NASA_project/blob/main/%22NASA_project_4%22.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

14-This code imports necessary libraries, creates a DataFrame with example exoplanet data, normalizes the numerical features using StandardScaler, and outputs a new DataFrame containing the normalized data along with the original planet names.

In [None]:
import requests
import pandas as pd
from io import StringIO
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
import numpy as np

# Base URL for NASA Exoplanet Archive API
base_url = "https://exoplanetarchive.ipac.caltech.edu/cgi-bin/nstedAPI/nph-nstedAPI?"

# Define request parameters
table = "cumulative"
where_clause = "koi_disposition like 'CANDIDATE'"
order_by = "koi_period"
format_type = "csv"

# Formulate the request URL
query_url = f"{base_url}table={table}&where={where_clause}&order={order_by}&format={format_type}"

# Execute the request
response = requests.get(query_url)

if response.status_code == 200:
    # Read data into DataFrame
    data = pd.read_csv(StringIO(response.text))

    # Use the 'kepoi_name' column for candidate names
    planet_names_column = 'kepoi_name'
    planet_names = data[planet_names_column].tolist() if planet_names_column in data.columns else []

    if len(planet_names) == 0:
        print("No candidate planets found. Exiting.")
    else:
        # Example data for training the model
        df = pd.DataFrame({
            'temperature': np.random.uniform(200, 400, size=len(planet_names)),  # Example temperatures
            'transits': np.random.randint(0, 2, size=len(planet_names)),  # Example transit data
            'label': np.random.choice(['habitable', 'not habitable'], size=len(planet_names))
        })

        # Convert class labels to numeric format
        df['label_numeric'] = df['label'].map({'habitable': 1, 'not habitable': 0})

        # Split data into training and testing sets
        X = df[['temperature', 'transits']]
        y = df['label_numeric']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

        # Create a Random Forest model
        rf_model = RandomForestClassifier()

        # Set up hyperparameter tuning with Grid Search
        param_grid = {
            'n_estimators': [10, 50, 100],
            'max_depth': [None, 5, 10, 15],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4]
        }
        grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2, scoring='accuracy')

        # Train the model
        grid_search.fit(X_train, y_train)

        # Prediction
        predictions = grid_search.best_estimator_.predict(X_test)

        # Output predictions
        for i, pred in enumerate(predictions):
            print(f"Candidate Planet: {planet_names[i]}, Prediction: {'Habitable' if pred == 1 else 'Not Habitable'}")
else:
    print(f"Error executing request: {response.status_code}")


Fitting 5 folds for each of 108 candidates, totalling 540 fits
Candidate Planet: K04419.01, Prediction: Habitable
Candidate Planet: K07859.01, Prediction: Habitable
Candidate Planet: K02879.01, Prediction: Not Habitable
Candidate Planet: K02874.01, Prediction: Habitable
Candidate Planet: K04844.01, Prediction: Not Habitable
Candidate Planet: K04144.01, Prediction: Habitable
Candidate Planet: K04685.01, Prediction: Habitable
Candidate Planet: K02093.03, Prediction: Habitable
Candidate Planet: K04430.01, Prediction: Habitable
Candidate Planet: K07865.01, Prediction: Not Habitable
Candidate Planet: K06635.01, Prediction: Not Habitable
Candidate Planet: K00971.01, Prediction: Habitable
Candidate Planet: K03132.01, Prediction: Habitable
Candidate Planet: K02796.01, Prediction: Habitable
Candidate Planet: K07489.01, Prediction: Habitable
Candidate Planet: K01251.01, Prediction: Habitable
Candidate Planet: K06860.01, Prediction: Habitable
Candidate Planet: K03913.01, Prediction: Habitable
Can

15-This code retrieves candidate exoplanet data from the NASA Exoplanet Archive, creates a Random Forest regression model to predict the probability of water presence on the planets based on two factors, and outputs the predicted probabilities for each candidate planet.

In [None]:
import requests
import pandas as pd
from io import StringIO
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import numpy as np

# Базовый URL для API NASA Exoplanet Archive
base_url = "https://exoplanetarchive.ipac.caltech.edu/cgi-bin/nstedAPI/nph-nstedAPI?"

# Определите параметры запроса
table = "cumulative"
where_clause = "koi_disposition like 'CANDIDATE'"
order_by = "koi_period"
format_type = "csv"

# Сформируйте URL запроса
query_url = f"{base_url}table={table}&where={where_clause}&order={order_by}&format={format_type}"

# Выполните запрос
response = requests.get(query_url)

if response.status_code == 200:
    # Прочитайте данные в DataFrame
    data = pd.read_csv(StringIO(response.text))

    # Используем колонку 'kepoi_name' для названий кандидатов
    planet_names_column = 'kepoi_name'
    planet_names = data[planet_names_column].tolist() if planet_names_column in data.columns else []

    if len(planet_names) == 0:
        print("No candidate planets found. Exiting.")
    else:
        # Пример данных для обучения модели
        df = pd.DataFrame({
            'factor_1': np.random.randint(1, 10, size=len(planet_names)),
            'factor_2': np.random.randint(5, 15, size=len(planet_names)),
            'water_probability': np.random.rand(len(planet_names))  # Вероятность наличия воды
        })

        # Разделение данных на обучающую и тестовую выборки
        X = df[['factor_1', 'factor_2']]
        y = df['water_probability']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Создание модели случайного леса для регрессии
        rf_model = RandomForestRegressor()

        # Обучение модели
        rf_model.fit(X_train, y_train)

        # Предсказание
        predictions = rf_model.predict(X_test)

        # Вывод предсказаний
        for i, pred in enumerate(predictions):
            if i < len(planet_names):
                print(f"Candidate Planet: {planet_names[i]}, Predicted Water Probability: {pred:.2f}")
else:
    print(f"Error executing request: {response.status_code}")


Candidate Planet: K04419.01, Predicted Water Probability: 0.63
Candidate Planet: K07859.01, Predicted Water Probability: 0.55
Candidate Planet: K02879.01, Predicted Water Probability: 0.38
Candidate Planet: K02874.01, Predicted Water Probability: 0.40
Candidate Planet: K04844.01, Predicted Water Probability: 0.47
Candidate Planet: K04144.01, Predicted Water Probability: 0.44
Candidate Planet: K04685.01, Predicted Water Probability: 0.49
Candidate Planet: K02093.03, Predicted Water Probability: 0.51
Candidate Planet: K04430.01, Predicted Water Probability: 0.42
Candidate Planet: K07865.01, Predicted Water Probability: 0.52
Candidate Planet: K06635.01, Predicted Water Probability: 0.47
Candidate Planet: K00971.01, Predicted Water Probability: 0.63
Candidate Planet: K03132.01, Predicted Water Probability: 0.56
Candidate Planet: K02796.01, Predicted Water Probability: 0.48
Candidate Planet: K07489.01, Predicted Water Probability: 0.45
Candidate Planet: K01251.01, Predicted Water Probabilit