In [1]:
import sys
print(sys.executable)

C:\Users\andre\AppData\Local\Programs\Python\Python312\python.exe


In [2]:
import os
print("DIR: ")  
print(os.getcwd())

DIR: 
C:\Users\andre\Desktop\PIANETI_STELLE


In [3]:
# Importa le librerie necessarie
from astroquery.sdss import SDSS
from astropy.table import Table
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
import pytensor
pytensor.config.cxx = ''  # Imposta il flag per evitare l'uso di compilazioni in C



In [5]:
import exoplanet as xo
import pandas as pd
import numpy as np

In [6]:
# Percorso al file CSV (devi avere un file salvato localmente)
# Prendi il DATASET GIA SCARICATO precedentemente

file_path = "./Data/Nasa_Exoplanet_Archive.csv" 
# Leggi le prime righe per diagnosticare il problema
with open(file_path, 'r', encoding='utf-8') as f:
    for _ in range(24):  # Mostra le prime 5 righe
        print(f.readline().strip())  # Rimuove eventuali spazi bianchi

# This file was produced by the NASA Exoplanet Archive  http://exoplanetarchive.ipac.caltech.edu
# Mon Oct 21 10:20:43 2024
#
# User preference: *
#
# COLUMN pl_name:        Planet Name
# COLUMN pl_refname:     Planetary Parameter Reference
# COLUMN pl_orbper:      Orbital Period [days]
# COLUMN pl_orbsmax:     Orbit Semi-Major Axis [au]
# COLUMN pl_bmasse:      Planet Mass or Mass*sin(i) [Earth Mass]
# COLUMN pl_bmassj:      Planet Mass or Mass*sin(i) [Jupiter Mass]
# COLUMN pl_bmassprov:   Planet Mass or Mass*sin(i) Provenance
# COLUMN pl_orbeccen:    Eccentricity
# COLUMN pl_insol:       Insolation Flux [Earth Flux]
# COLUMN pl_eqt:         Equilibrium Temperature [K]
# COLUMN rastr:          RA [sexagesimal]
# COLUMN ra:             RA [deg]
# COLUMN decstr:         Dec [sexagesimal]
# COLUMN dec:            Dec [deg]
# COLUMN sy_dist:        Distance [pc]
# COLUMN sy_vmag:        V (Johnson) Magnitude
# COLUMN sy_kmag:        Ks (2MASS) Magnitude
# COLUMN sy_gaiamag:     Gaia Magn

Leggenda presa dalla documentazione ufficiale https://exoplanetarchive.ipac.caltech.edu :

- **pl_name**: Nome del Pianeta - Nome del pianeta più comunemente usato nella letteratura.
- **pl_refname**: Riferimento Parametri Planetari - Pubblicazione di riferimento per i parametri planetari.
- **pl_orbper**: Periodo Orbitale [giorni] - Tempo impiegato dal pianeta per completare un'orbita attorno alla sua stella ospite.
- **pl_orbsmax**: Semi-Asse Maggiore [au] - Raggio più lungo di un'orbita ellittica o separazione proiettata per esopianeti scoperti tramite microlensing gravitazionale o imaging diretto.
- **pl_bmasse**: Massa del Pianeta [Masse terrestri] - Quantità di materia del pianeta in unità di masse terrestri.
- **pl_bmassj**: Massa del Pianeta [Masse gioviane] - Quantità di materia del pianeta in unità di masse gioviane.
- **pl_bmassprov**: Provenienza Massa - Provenienza della misurazione della massa: Massa, M*sin(i), ecc.
- **pl_orbeccen**: Eccentricità - Quanto l'orbita del pianeta devia da un cerchio perfetto.
- **pl_insol**: Flusso di Insolazione [Flusso terrestre] - Quantità di energia ricevuta dal pianeta, in unità relative alla Terra.
- **pl_eqt**: Temperatura di Equilibrio [K] - Temperatura di equilibrio modellata per il pianeta.
- **rastr**: Ascensione Retta [sessagesimale] - Ascensione retta del sistema planetario in formato sessagesimale.
- **ra**: Ascensione Retta [gradi] - Ascensione retta in gradi decimali.
- **decstr**: Declinazione [sessagesimale] - Declinazione del sistema planetario in formato sessagesimale.
- **dec**: Declinazione [gradi] - Declinazione in gradi decimali.
- **sy_dist**: Distanza [pc] - Distanza dal sistema planetario in parsec.
- **sy_vmag**: Magnitudine V (Johnson) - Luminosità della stella ospite in magnitudini, misurata nella banda V (Johnson).
- **sy_kmag**: Magnitudine Ks (2MASS) - Luminosità della stella ospite in magnitudini, misurata nella banda K (2MASS).
- **sy_gaiamag**: Magnitudine Gaia - Luminosità della stella misurata nella banda Gaia.

In [7]:
# Prova a leggere il file come un DataFrame
try:
    # Carica il file in un DataFrame
    df = pd.read_csv(file_path, sep='\t', on_bad_lines='skip', encoding='utf-8')
    print("\nDati caricati con successo, rimani sfigato comunque però:")
except pd.errors.ParserError as e:
    print(f"Errore durante il parsing: {e}")
except FileNotFoundError:
    print(f"Errore: Il file '{file_path}' non è stato trovato.")
except Exception as e:
    print(f"Si è verificato un errore inaspettato: {e}")



Dati caricati con successo, rimani sfigato comunque però:


In [11]:
# Carica il file CSV, saltando le righe che iniziano con '#' e ignorando eventuali righe malformate
df = pd.read_csv(file_path, comment='#', sep=',', on_bad_lines='skip', encoding='utf-8')

In [13]:
# Estrarre solo le colonne di interesse
selected_columns = ['pl_name', 'pl_orbper', 'pl_orbsmax', 'pl_bmasse', 
                    'pl_bmassj', 'pl_bmassprov', 'pl_orbeccen', 'pl_insol', 'pl_eqt', 
                    'rastr', 'ra', 'decstr', 'dec', 'sy_dist']
# Filtrare il dataframe per mantenere solo le colonne selezionate
df = df[selected_columns]

In [14]:
df

Unnamed: 0,pl_name,pl_orbper,pl_orbsmax,pl_bmasse,pl_bmassj,pl_bmassprov,pl_orbeccen,pl_insol,pl_eqt,rastr,ra,decstr,dec,sy_dist
0,11 Com b,,1.210,5434.70000,17.100,Msini,,,,12h20m42.91s,185.178779,+17d47m35.71s,17.793252,93.1846
1,11 Com b,326.03000,1.290,6165.60000,19.400,Msini,0.231,,,12h20m42.91s,185.178779,+17d47m35.71s,17.793252,93.1846
2,11 Com b,323.21000,1.178,4914.89849,15.464,Msini,0.238,,,12h20m42.91s,185.178779,+17d47m35.71s,17.793252,93.1846
3,11 UMi b,516.21997,1.530,4684.81420,14.740,Msini,0.080,,,15h17m05.90s,229.274595,+71d49m26.19s,71.823943,125.3210
4,11 UMi b,,1.510,3432.40000,10.800,Msini,,,,15h17m05.90s,229.274595,+71d49m26.19s,71.823943,125.3210
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36457,ups And d,1319.00000,2.570,1255.38000,3.950,Msini,0.269,,,01h36m47.60s,24.198353,+41d24m13.73s,41.403815,13.4054
36458,ups Leo b,385.20000,1.180,162.09249,0.510,Msini,0.320,,,11h36m56.93s,174.237219,-00d49m24.83s,-0.823564,52.5973
36459,xi Aql b,,0.580,642.00000,2.020,Msini,,,,19h54m14.99s,298.562449,+08d27m39.98s,8.461105,56.1858
36460,xi Aql b,136.75000,0.680,890.00000,2.800,Msini,0.000,,,19h54m14.99s,298.562449,+08d27m39.98s,8.461105,56.1858


In [61]:
df_cleaned = df.dropna()

In [62]:
df_cleaned

Unnamed: 0,pl_name,pl_orbper,pl_orbsmax,pl_bmasse,pl_bmassj,pl_bmassprov,pl_orbeccen,pl_insol,pl_eqt,rastr,ra,decstr,dec,sy_dist,target
225,Barnard b,3.153300,0.02294,0.37,0.00116,Msini,0.1600,6.76,400.0,17h57m47.67s,269.448614,+04d44m16.73s,4.737981,1.82655,0
469,EPIC 220674823 b,0.571292,0.01326,8.21,0.02583,Mass,0.0000,4670.00,2300.0,00h52m19.21s,13.080024,+10d47m40.94s,10.794705,244.59000,0
485,EPIC 220674823 c,13.339700,0.10830,8.90,0.02800,Mass,0.1300,70.00,805.0,00h52m19.21s,13.080024,+10d47m40.94s,10.794705,244.59000,0
492,EPIC 229004835 b,16.141132,0.12370,10.40,0.03272,Mass,0.2300,69.70,804.0,12h25m56.65s,186.486041,-01d24m17.00s,-1.404721,121.97100,0
496,EPIC 248847494 b,3650.000000,4.50000,4131.79,13.00000,Mass,0.0000,0.19,183.0,10h37m33.38s,159.389081,+11d50m33.89s,11.842748,551.88600,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36332,YZ Cet c,3.059890,0.02156,1.14,0.00359,Msini,0.0000,4.72,410.0,01h12m31.94s,18.133079,-16d59m46.48s,-16.996243,3.71207,0
36336,YZ Cet d,4.656260,0.02851,1.09,0.00343,Msini,0.0700,2.70,357.0,01h12m31.94s,18.133079,-16d59m46.48s,-16.996243,3.71207,0
36411,pi Men c,6.267900,0.06839,4.82,0.01517,Mass,0.0000,309.00,1170.0,05h37m11.83s,84.299280,-80d27m52.57s,-80.464604,18.27020,0
36422,rho CrB b,39.845800,0.21960,332.10,1.04490,Msini,0.0373,34.70,614.0,16h01m02.42s,240.260064,+33d18m00.67s,33.300186,17.46710,0


In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import precision_score, recall_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

In [22]:
df['target']= 0 #Pianeta 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['target']= 0 #Pianeta


In [42]:
type(df)

pandas.core.frame.DataFrame

In [64]:
# Supponendo che 'target' sia la colonna che vuoi mantenere
df_cleaned = df 
df = df.select_dtypes(include=[np.number])  # Tieni solo le colonne numeriche
df = df.dropna()
df.head()

Unnamed: 0,pl_orbper,pl_orbsmax,pl_bmasse,pl_bmassj,pl_orbeccen,pl_insol,pl_eqt,ra,dec,sy_dist,target
225,3.1533,0.02294,0.37,0.00116,0.16,6.76,400.0,269.448614,4.737981,1.82655,0
469,0.571292,0.01326,8.21,0.02583,0.0,4670.0,2300.0,13.080024,10.794705,244.59,0
485,13.3397,0.1083,8.9,0.028,0.13,70.0,805.0,13.080024,10.794705,244.59,0
492,16.141132,0.1237,10.4,0.03272,0.23,69.7,804.0,186.486041,-1.404721,121.971,0
496,3650.0,4.5,4131.79,13.0,0.0,0.19,183.0,159.389081,11.842748,551.886,0


In [65]:
# Supponiamo che la colonna 'target' sia quella che identifica se è un pianeta o meno
X = df.drop('target', axis=1).to_numpy()    # Feature (tutte le colonne tranne 'target')
y = df['target'].to_numpy()    # Target (pianeta o no)


# Divisione del dataset: 80% training e 20% test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape per adattarsi alla CNN, assumendo che X abbia una sola feature per riga (1D)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

In [66]:
print(X, type(X))
print(y,type(y))

[[ 3.15330000e+00  2.29400000e-02  3.70000000e-01 ...  2.69448614e+02
   4.73798080e+00  1.82655000e+00]
 [ 5.71292000e-01  1.32600000e-02  8.21000000e+00 ...  1.30800239e+01
   1.07947051e+01  2.44590000e+02]
 [ 1.33397000e+01  1.08300000e-01  8.90000000e+00 ...  1.30800239e+01
   1.07947051e+01  2.44590000e+02]
 ...
 [ 6.26790000e+00  6.83900000e-02  4.82000000e+00 ...  8.42992798e+01
  -8.04646041e+01  1.82702000e+01]
 [ 3.98458000e+01  2.19600000e-01  3.32100000e+02 ...  2.40260064e+02
   3.33001861e+01  1.74671000e+01]
 [ 1.02540000e+02  4.12300000e-01  2.50000000e+01 ...  2.40260064e+02
   3.33001861e+01  1.74671000e+01]] <class 'numpy.ndarray'>
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

In [67]:
# Step 2: Creazione della CNN
def create_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.25))
    
    model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.25))
    
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    
    model.add(Dense(1, activation='sigmoid'))  # Uscita binaria (pianeta o no)
    
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Definiamo il modello CNN
model = create_cnn_model((X_train.shape[1], 1))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [68]:
print(X_train.dtype, type(X_train))
print(y_train.dtype,type(y_train))

float64 <class 'numpy.ndarray'>
int64 <class 'numpy.ndarray'>


In [69]:
print("Forma di X_train:", X_train.shape)
print("Forma di X_test:", X_test.shape)

Forma di X_train: (395, 10, 1)
Forma di X_test: (99, 10, 1)


In [70]:
print("Tipo di X_fold_train:", X_fold_train.dtype)
print("Tipo di X_fold_val:", X_fold_val.dtype)
print("Tipo di y_fold_train:", y_fold_train.dtype)
print("Tipo di y_fold_val:", y_fold_val.dtype)

Tipo di X_fold_train: object
Tipo di X_fold_val: object
Tipo di y_fold_train: int64
Tipo di y_fold_val: int64


In [71]:
X_train_1= X_train.astype(np.float32)

In [72]:
# Addestramento del modello sul set di addestramento
model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

# Predizione sul set di test
y_pred = (model.predict(X_test) > 0.5).astype(int)

# Calcolo precision e recall
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

# Risultati finali
print(f"Precisione: {precision:.4f}")
print(f"Recall: {recall:.4f}")

Epoch 1/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 54ms/step - accuracy: 0.7055 - loss: 29.1763
Epoch 2/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 1.0000 - loss: 2.5714e-14
Epoch 3/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 45ms/step - accuracy: 1.0000 - loss: 2.1880e-37
Epoch 4/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - accuracy: 1.0000 - loss: 0.0000e+00
Epoch 5/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 1.0000 - loss: 1.0991e-30
Epoch 6/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - accuracy: 1.0000 - loss: 1.9544e-32
Epoch 7/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - accuracy: 1.0000 - loss: 4.7876e-12
Epoch 8/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 1.0000 - loss: 2.3263e-29
Epoch 9/10
[1m13/

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [73]:
# Step 4: Valutazione sul test set
y_pred_test = (model.predict(X_test) > 0.5).astype(int)
precision_test = precision_score(y_test, y_pred_test)
recall_test = recall_score(y_test, y_pred_test)

print(f"Precision sul test set: {precision_test:.4f}")
print(f"Recall sul test set: {recall_test:.4f}")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Precision sul test set: 0.0000
Recall sul test set: 0.0000


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [74]:
y_pred_test

array([[0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
    

In [75]:
# Step 4: Valutazione sul test set
y_pred_test = (model.predict(X_test) > 0.7).astype(int)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step


In [78]:
from sklearn.metrics import classification_report
# Report di classificazione
print(classification_report(y_test, y_pred_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        99

    accuracy                           1.00        99
   macro avg       1.00      1.00      1.00        99
weighted avg       1.00      1.00      1.00        99

