In [2]:
# Import core libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# For astronomical data (may use later in processing)
from astropy.table import Table
import lightkurve as lk

# Configure visualization
sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (10, 6)



In [10]:
import pandas as pd

# Load dataset properly
df = pd.read_csv(
    "keplar_dataset.csv",
    sep=",",             # Correct separator
    comment="#",         # Skip metadata lines
    skipinitialspace=True
)

# Strip whitespace from headers
df.columns = df.columns.str.strip()

# ---- Dataset Summary ----
print("✅ Dataset successfully loaded and parsed!\n")
print("Shape:", df.shape)
print("Columns:", df.columns.tolist()[:15], "...")  # show first 15 cols only

print("\nFirst 5 rows of dataset:")
print(df.head())

print("\nDataset Info:")
df.info()

print("\nMissing Values per Column:")
print(df.isnull().sum().head(15))  # show first 15 cols

✅ Dataset successfully loaded and parsed!

Shape: (9564, 141)
Columns: ['rowid', 'kepid', 'kepoi_name', 'kepler_name', 'koi_disposition', 'koi_vet_stat', 'koi_vet_date', 'koi_pdisposition', 'koi_score', 'koi_fpflag_nt', 'koi_fpflag_ss', 'koi_fpflag_co', 'koi_fpflag_ec', 'koi_disp_prov', 'koi_comment'] ...

First 5 rows of dataset:
   rowid     kepid kepoi_name   kepler_name koi_disposition koi_vet_stat  \
0      1  10797460  K00752.01  Kepler-227 b       CONFIRMED         Done   
1      2  10797460  K00752.02  Kepler-227 c       CONFIRMED         Done   
2      3  10811496  K00753.01           NaN       CANDIDATE         Done   
3      4  10848459  K00754.01           NaN  FALSE POSITIVE         Done   
4      5  10854555  K00755.01  Kepler-664 b       CONFIRMED         Done   

  koi_vet_date koi_pdisposition  koi_score  koi_fpflag_nt  ...  \
0   2018-08-16        CANDIDATE      1.000              0  ...   
1   2018-08-16        CANDIDATE      0.969              0  ...   
2   2018-08-

In [11]:
# Unique dispositions
print("Planet dispositions:", df['koi_disposition'].value_counts(), "\n")

# How many confirmed planets
print("Number of confirmed exoplanets:", (df['koi_disposition'] == "CONFIRMED").sum())

# Average radius of confirmed planets
print("Average radius (Earth radii):", df.loc[df['koi_disposition']=="CONFIRMED", 'koi_prad'].mean())

# Small habitable-zone candidates (example filter)
habitable_candidates = df[
    (df['koi_disposition']=="CANDIDATE") &
    (df['koi_prad'] < 2.0) &                   # smaller than 2 Earth radii
    (df['koi_insol'].between(0.5, 2.0))        # roughly Earth-like flux
]
print("Potential habitable candidates:", habitable_candidates.shape[0])


Planet dispositions: koi_disposition
FALSE POSITIVE    4839
CONFIRMED         2746
CANDIDATE         1979
Name: count, dtype: int64 

Number of confirmed exoplanets: 2746
Average radius (Earth radii): 2.8573724489795915
Potential habitable candidates: 61
