In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import bokeh

In [2]:
# Get the current notebook's directory
current_dir = os.path.dirname(os.path.abspath('__file__'))
parent_dir = os.path.dirname(current_dir)
data_dir = os.path.join(parent_dir, 'data')

try:
    # Construct file paths using os.path.join for cross-platform compatibility
    exoplanets = os.path.join(current_dir, 'Exoplanets_Confirmed_db_1504.csv')
    
    # Read the files
    exoplanets = pd.read_csv(exoplanets, index_col="loc_rowid", comment="#")
    print("Datasets loaded successfully")
    
except Exception as e:
    print(f"Error loading datasets: {e}")
    print(f"Current directory: {current_dir}")
    print(f"Looking for files in: {data_dir}")

Datasets loaded successfully


In [3]:
print(exoplanets["soltype"].value_counts())
print(exoplanets.columns)


soltype
Published Confirmed                               18998
Kepler Project Candidate (q1_q17_dr25_sup_koi)     2732
Kepler Project Candidate (q1_q16_koi)              2721
Kepler Project Candidate (q1_q17_dr25_koi)         2715
Kepler Project Candidate (q1_q17_dr24_koi)         2701
Kepler Project Candidate (q1_q12_koi)              2679
Kepler Project Candidate (q1_q8_koi)               2306
Published Candidate                                2190
TESS Project Candidate                             1148
Name: count, dtype: int64
Index(['pl_name', 'hostname', 'sy_snum', 'sy_pnum', 'discoverymethod',
       'disc_year', 'disc_facility', 'disc_telescope', 'disc_instrument',
       'soltype', 'pl_controv_flag', 'pl_refname', 'pl_orbper',
       'pl_orbpererr1', 'pl_orbpererr2', 'pl_orbperlim', 'pl_orbsmax',
       'pl_orbsmaxerr1', 'pl_orbsmaxerr2', 'pl_orbsmaxlim', 'pl_rade',
       'pl_radeerr1', 'pl_radeerr2', 'pl_radelim', 'pl_radj', 'pl_radjerr1',
       'pl_radjerr2', 'pl_radjlim'

In [4]:
name = "pl_name"
detection_method = "discoverymethod"
year = "disc_year"
reference = "pl_refname"
facility = "disc_facility"
instrument = "disc_instrument"
telescope = "disc_telescope"
ex = exoplanets.copy()
ex = ex.drop_duplicates(subset = name, keep = "first")
columns_to_drop = ["soltype", "pl_controv_flag", "pl_refname", "pl_orbperlim", 
                   "pl_orbsmaxlim", "pl_radelim", "pl_radjlim", "pl_masselim", "pl_msinielim", "pl_bmasselim", 
                   "pl_bmassjlim", "pl_bmassprov", "pl_denslim", "pl_orbeccenlim", "pl_insollim", "pl_orbincllim",
                   "st_refname", "st_tefflim", "st_radlim", "st_masslim", "st_metlim", "st_logglim", "sy_refname",
                   "rowupdate", "pl_pubdate", "releasedate"]
exoplanets = exoplanets[exoplanets["soltype"]=="Published Confirmed"].drop_duplicates(subset = [name,detection_method], keep = "first").drop(columns = columns_to_drop)
display(exoplanets)

Unnamed: 0_level_0,pl_name,hostname,sy_snum,sy_pnum,discoverymethod,disc_year,disc_facility,disc_telescope,disc_instrument,pl_orbper,...,rastr,ra,decstr,dec,sy_dist,sy_disterr1,sy_disterr2,sy_gaiamag,sy_gaiamagerr1,sy_gaiamagerr2
loc_rowid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,11 Com b,11 Com,2,1,Radial Velocity,2007,Xinglong Station,2.16 m Telescope,Coude Echelle Spectrograph,326.030000,...,12h20m42.91s,185.178779,+17d47m35.71s,17.793252,93.1846,1.92380,-1.92380,4.44038,0.003848,-0.003848
4,11 UMi b,11 UMi,1,1,Radial Velocity,2009,Thueringer Landessternwarte Tautenburg,2.0 m Alfred Jensch Telescope,Coude Echelle Spectrograph,516.219970,...,15h17m05.90s,229.274595,+71d49m26.19s,71.823943,125.3210,1.97650,-1.97650,4.56216,0.003903,-0.003903
7,14 And b,14 And,1,1,Radial Velocity,2008,Okayama Astrophysical Observatory,1.88 m Telescope,HIDES Echelle Spectrograph,185.840000,...,23h31m17.80s,352.824150,+39d14m09.01s,39.235837,75.4392,0.71400,-0.71400,4.91781,0.002826,-0.002826
10,14 Her b,14 Her,1,2,Radial Velocity,2002,W. M. Keck Observatory,10 m Keck I Telescope,HIRES Spectrometer,1773.400020,...,16h10m24.50s,242.602101,+43d48m58.90s,43.816362,17.9323,0.00730,-0.00730,6.38300,0.000351,-0.000351
18,16 Cyg B b,16 Cyg B,3,1,Radial Velocity,1996,Multiple Observatories,Multiple Telescopes,Multiple Instruments,798.500000,...,19h41m51.75s,295.465642,+50d31m00.57s,50.516824,21.1397,0.01100,-0.01110,6.06428,0.000603,-0.000603
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38168,ups And b,ups And,2,3,Radial Velocity,1996,Lick Observatory,3.0 m C. Donald Shane Telescope,Hamilton Echelle Spectrograph,4.617136,...,01h36m47.60s,24.198353,+41d24m13.73s,41.403815,13.4054,0.06350,-0.06290,3.98687,0.008937,-0.008937
38175,ups And c,ups And,2,3,Radial Velocity,1999,Multiple Observatories,Multiple Telescopes,Multiple Instruments,241.223000,...,01h36m47.60s,24.198353,+41d24m13.73s,41.403815,13.4054,0.06350,-0.06290,3.98687,0.008937,-0.008937
38181,ups And d,ups And,2,3,Radial Velocity,1999,Multiple Observatories,Multiple Telescopes,Multiple Instruments,1282.410000,...,01h36m47.60s,24.198353,+41d24m13.73s,41.403815,13.4054,0.06350,-0.06290,3.98687,0.008937,-0.008937
38187,ups Leo b,ups Leo,1,1,Radial Velocity,2021,Okayama Astrophysical Observatory,1.88 m Telescope,HIDES Echelle Spectrograph,385.200000,...,11h36m56.93s,174.237219,-00d49m24.83s,-0.823564,52.5973,0.92720,-0.89630,4.03040,0.008513,-0.008513


# Exoplanets Detection Methods Analysis

## Data Overview
The data has been loaded and cleaned. Now we'll analyze detection methods and instruments used.

## Detection Methods Summary

In [5]:
# Detection methods analysis
print("=== EXOPLANET DETECTION METHODS SUMMARY ===\n")

# Get unique detection methods and their counts
methods_count = exoplanets[detection_method].value_counts().sort_values(ascending=False)
print(f"Total number of confirmed exoplanets: {len(exoplanets)}")
print(f"Number of different detection methods: {len(methods_count)}\n")

print("Detection methods (sorted by number of discoveries):")
print("-" * 50)
for i, (method, count) in enumerate(methods_count.items(), 1):
    percentage = (count / len(exoplanets)) * 100
    print(f"{i:2d}. {method:<35} {count:>6} ({percentage:5.1f}%)")

print(f"\nTop 3 methods account for {(methods_count.head(3).sum() / len(exoplanets) * 100):.1f}% of all discoveries")

=== EXOPLANET DETECTION METHODS SUMMARY ===

Total number of confirmed exoplanets: 5869
Number of different detection methods: 11

Detection methods (sorted by number of discoveries):
--------------------------------------------------
 1. Transit                               4360 ( 74.3%)
 2. Radial Velocity                       1112 ( 18.9%)
 3. Microlensing                           237 (  4.0%)
 4. Imaging                                 83 (  1.4%)
 5. Transit Timing Variations               35 (  0.6%)
 6. Eclipse Timing Variations               17 (  0.3%)
 7. Orbital Brightness Modulation            9 (  0.2%)
 8. Pulsar Timing                            8 (  0.1%)
 9. Astrometry                               5 (  0.1%)
10. Pulsation Timing Variations              2 (  0.0%)
11. Disk Kinematics                          1 (  0.0%)

Top 3 methods account for 97.3% of all discoveries


## Instruments and Facilities Analysis by Detection Method

Now we'll analyze the instruments and facilities used for each detection method:

In [6]:
# 1. TRANSIT METHOD ANALYSIS
print("=" * 60)
print("1. TRANSIT METHOD")
print("=" * 60)

transit_data = ex[ex[detection_method] == "Transit"].copy()
print(f"Total Transit discoveries: {len(transit_data)}")

# Instruments analysis
print("\n--- INSTRUMENTS ---")
transit_instruments = transit_data[instrument].value_counts()
print(f"Number of different instruments: {len(transit_instruments)}")
for i, (instr, count) in enumerate(transit_instruments.head(10).items(), 1):
    percentage = (count / len(transit_data)) * 100
    print(f"{i:2d}. {instr:<35} {count:>4} ({percentage:4.1f}%)")

# Facilities analysis  
print("\n--- FACILITIES ---")
transit_facilities = transit_data[facility].value_counts()
print(f"Number of different facilities: {len(transit_facilities)}")
for i, (fac, count) in enumerate(transit_facilities.head(10).items(), 1):
    percentage = (count / len(transit_data)) * 100
    print(f"{i:2d}. {fac:<35} {count:>4} ({percentage:4.1f}%)")

# Combined analysis
print("\n--- TOP INSTRUMENT-FACILITY COMBINATIONS ---")
transit_combined = transit_data.groupby([instrument, facility]).size().sort_values(ascending=False)
for i, ((instr, fac), count) in enumerate(transit_combined.head(5).items(), 1):
    print(f"{i}. {instr} @ {fac}: {count}")

print("\n")

1. TRANSIT METHOD
Total Transit discoveries: 4360

--- INSTRUMENTS ---
Number of different instruments: 29
 1. Kepler CCD Array                    3292 (75.5%)
 2. TESS CCD Array                       616 (14.1%)
 3. iKon-L CCD Camera                    153 ( 3.5%)
 4. Apogee 4K CCD Sensor                  95 ( 2.2%)
 5. 2K CCD Sensor                         60 ( 1.4%)
 6. CoRoT CCD Array                       34 ( 0.8%)
 7. Multiple Instruments                  33 ( 0.8%)
 8. NGTS CCD array                        21 ( 0.5%)
 9. FLI ProLine 4K CCD Sensor             10 ( 0.2%)
10. OGLE CCD Array                         8 ( 0.2%)

--- FACILITIES ---
Number of different facilities: 36
 1. Kepler                              2746 (63.0%)
 2. Transiting Exoplanet Survey Satellite (TESS)  616 (14.1%)
 3. K2                                   546 (12.5%)
 4. SuperWASP                            111 ( 2.5%)
 5. HATSouth                              72 ( 1.7%)
 6. HATNet                        

In [7]:
# 2. RADIAL VELOCITY METHOD ANALYSIS
print("=" * 60)
print("2. RADIAL VELOCITY METHOD")
print("=" * 60)

radial_velocity_data = ex[ex[detection_method] == "Radial Velocity"].copy()
print(f"Total Radial Velocity discoveries: {len(radial_velocity_data)}")

# Instruments analysis
print("\n--- INSTRUMENTS ---")
rv_instruments = radial_velocity_data[instrument].value_counts()
print(f"Number of different instruments: {len(rv_instruments)}")
for i, (instr, count) in enumerate(rv_instruments.head(10).items(), 1):
    percentage = (count / len(radial_velocity_data)) * 100
    print(f"{i:2d}. {instr:<35} {count:>4} ({percentage:4.1f}%)")

# Facilities analysis  
print("\n--- FACILITIES ---")
rv_facilities = radial_velocity_data[facility].value_counts()
print(f"Number of different facilities: {len(rv_facilities)}")
for i, (fac, count) in enumerate(rv_facilities.head(10).items(), 1):
    percentage = (count / len(radial_velocity_data)) * 100
    print(f"{i:2d}. {fac:<35} {count:>4} ({percentage:4.1f}%)")

# Combined analysis
print("\n--- TOP INSTRUMENT-FACILITY COMBINATIONS ---")
rv_combined = radial_velocity_data.groupby([instrument, facility]).size().sort_values(ascending=False)
for i, ((instr, fac), count) in enumerate(rv_combined.head(5).items(), 1):
    print(f"{i}. {instr} @ {fac}: {count}")

print("\n")

2. RADIAL VELOCITY METHOD
Total Radial Velocity discoveries: 1112

--- INSTRUMENTS ---
Number of different instruments: 38
 1. Multiple Instruments                 295 (26.5%)
 2. HARPS Spectrograph                   223 (20.1%)
 3. HIRES Spectrometer                   181 (16.3%)
 4. CORALIE Spectrograph                  52 ( 4.7%)
 5. SOPHIE Spectrograph                   48 ( 4.3%)
 6. HIDES Echelle Spectrograph            36 ( 3.2%)
 7. HARPS-N Spectrograph                  34 ( 3.1%)
 8. UCLES Spectrograph                    34 ( 3.1%)
 9. Hamilton Echelle Spectrograph         34 ( 3.1%)
10. BOES Echelle Spectrograph             26 ( 2.3%)

--- FACILITIES ---
Number of different facilities: 29
 1. La Silla Observatory                 282 (25.4%)
 2. Multiple Observatories               275 (24.7%)
 3. W. M. Keck Observatory               181 (16.3%)
 4. Haute-Provence Observatory            67 ( 6.0%)
 5. Okayama Astrophysical Observatory     36 ( 3.2%)
 6. Lick Observatory       

In [8]:
# 3. MICROLENSING METHOD ANALYSIS
print("=" * 60)
print("3. MICROLENSING METHOD")
print("=" * 60)

microlensing_data = ex[ex[detection_method] == "Microlensing"].copy()
print(f"Total Microlensing discoveries: {len(microlensing_data)}")

# Instruments analysis
print("\n--- INSTRUMENTS ---")
ml_instruments = microlensing_data[instrument].value_counts()
print(f"Number of different instruments: {len(ml_instruments)}")
for i, (instr, count) in enumerate(ml_instruments.items(), 1):
    percentage = (count / len(microlensing_data)) * 100
    print(f"{i:2d}. {instr:<35} {count:>4} ({percentage:4.1f}%)")

# Facilities analysis  
print("\n--- FACILITIES ---")
ml_facilities = microlensing_data[facility].value_counts()
print(f"Number of different facilities: {len(ml_facilities)}")
for i, (fac, count) in enumerate(ml_facilities.items(), 1):
    percentage = (count / len(microlensing_data)) * 100
    print(f"{i:2d}. {fac:<35} {count:>4} ({percentage:4.1f}%)")

# Combined analysis
print("\n--- INSTRUMENT-FACILITY COMBINATIONS ---")
ml_combined = microlensing_data.groupby([instrument, facility]).size().sort_values(ascending=False)
for i, ((instr, fac), count) in enumerate(ml_combined.items(), 1):
    print(f"{i}. {instr} @ {fac}: {count}")

print("\n")

3. MICROLENSING METHOD
Total Microlensing discoveries: 237

--- INSTRUMENTS ---
Number of different instruments: 7
 1. 18k Mosaic CCD camera                103 (43.5%)
 2. OGLE CCD Array                        98 (41.4%)
 3. MOA CCD Array                         31 (13.1%)
 4. Multiple Instruments                   2 ( 0.8%)
 5. Gaia CCD array                         1 ( 0.4%)
 6. Kepler CCD Array                       1 ( 0.4%)
 7. WFCAM                                  1 ( 0.4%)

--- FACILITIES ---
Number of different facilities: 7
 1. KMTNet                               103 (43.5%)
 2. OGLE                                  98 (41.4%)
 3. MOA                                   31 (13.1%)
 4. Multiple Observatories                 2 ( 0.8%)
 5. European Space Agency (ESA) Gaia Satellite    1 ( 0.4%)
 6. K2                                     1 ( 0.4%)
 7. Mauna Kea Observatory                  1 ( 0.4%)

--- INSTRUMENT-FACILITY COMBINATIONS ---
1. 18k Mosaic CCD camera @ KMTNet: 103
2

In [9]:
# 4. DIRECT IMAGING METHOD ANALYSIS
print("=" * 60)
print("4. DIRECT IMAGING METHOD")
print("=" * 60)

imaging_data = ex[ex[detection_method] == "Imaging"].copy()
print(f"Total Direct Imaging discoveries: {len(imaging_data)}")

# Instruments analysis
print("\n--- INSTRUMENTS ---")
img_instruments = imaging_data[instrument].value_counts()
print(f"Number of different instruments: {len(img_instruments)}")
for i, (instr, count) in enumerate(img_instruments.items(), 1):
    percentage = (count / len(imaging_data)) * 100
    print(f"{i:2d}. {instr:<35} {count:>4} ({percentage:4.1f}%)")

# Facilities analysis  
print("\n--- FACILITIES ---")
img_facilities = imaging_data[facility].value_counts()
print(f"Number of different facilities: {len(img_facilities)}")
for i, (fac, count) in enumerate(img_facilities.items(), 1):
    percentage = (count / len(imaging_data)) * 100
    print(f"{i:2d}. {fac:<35} {count:>4} ({percentage:4.1f}%)")

# Combined analysis
print("\n--- INSTRUMENT-FACILITY COMBINATIONS ---")
img_combined = imaging_data.groupby([instrument, facility]).size().sort_values(ascending=False)
for i, ((instr, fac), count) in enumerate(img_combined.items(), 1):
    print(f"{i}. {instr} @ {fac}: {count}")

print("\n")

4. DIRECT IMAGING METHOD
Total Direct Imaging discoveries: 83

--- INSTRUMENTS ---
Number of different instruments: 31
 1. SPHERE                                11 (13.3%)
 2. Multiple Instruments                  11 (13.3%)
 3. NIRC2 Camera                           8 ( 9.6%)
 4. NACO Camera                            8 ( 9.6%)
 5. Gaia CCD array                         7 ( 8.4%)
 6. NIRI Camera                            5 ( 6.0%)
 7. Gemini Planet Imager                   3 ( 3.6%)
 8. IRAC Infrared Array Camera             3 ( 3.6%)
 9. LMIRCam                                3 ( 3.6%)
10. HiCIAO Camera                          2 ( 2.4%)
11. PHARO Camera                           2 ( 2.4%)
12. WIRCam                                 1 ( 1.2%)
13. SIMON Near-Infrared Spectroimager      1 ( 1.2%)
14. WFPC2 Camera                           1 ( 1.2%)
15. Infrared Camera and Spectrograph (IRCS)    1 ( 1.2%)
16. SpeX                                   1 ( 1.2%)
17. ACS Camera               

In [10]:
# 5. OTHER DETECTION METHODS ANALYSIS
print("=" * 60)
print("5. OTHER DETECTION METHODS")
print("=" * 60)

# Get methods with fewer discoveries (exclude the top 4)
top_methods = ["Transit", "Radial Velocity", "Microlensing", "Imaging"]
other_methods = [method for method in ex[detection_method].unique() if method not in top_methods]

for method in other_methods:
    method_data = ex[ex[detection_method] == method].copy()
    print(f"\n--- {method.upper()} ---")
    print(f"Total discoveries: {len(method_data)}")
    
    if len(method_data) > 0:
        # Instruments
        instruments_count = method_data[instrument].value_counts()
        print(f"Instruments ({len(instruments_count)}): ", end="")
        print(", ".join([f"{instr} ({count})" for instr, count in instruments_count.items()]))
        
        # Facilities
        facilities_count = method_data[facility].value_counts()
        print(f"Facilities ({len(facilities_count)}): ", end="")
        print(", ".join([f"{fac} ({count})" for fac, count in facilities_count.items()]))

print("\n" + "=" * 60)

5. OTHER DETECTION METHODS

--- ECLIPSE TIMING VARIATIONS ---
Total discoveries: 17
Instruments (5): Multiple Instruments (10), VersArray 1300B CCD Camera (3), Kepler CCD Array (2), 6K CCD Mosaic (1), SBIG 6303e CCD Camera (1)
Facilities (6): Multiple Observatories (9), Yunnan Astronomical Observatory (3), Kepler (2), Multiple Facilities (1), Winer Observatory (1), Leoncito Astronomical Complex (1)

--- TRANSIT TIMING VARIATIONS ---
Total discoveries: 35
Instruments (3): Kepler CCD Array (21), Multiple Instruments (10), TESS CCD Array (4)
Facilities (4): Kepler (21), Multiple Observatories (9), Transiting Exoplanet Survey Satellite (TESS) (4), KOINet (1)

--- ASTROMETRY ---
Total discoveries: 5
Instruments (3): Gaia CCD array (3), FORS2 Spectrograph (1), Multiple Instruments (1)
Facilities (3): European Space Agency (ESA) Gaia Satellite (3), Paranal Observatory (1), Very Long Baseline Array (1)

--- DISK KINEMATICS ---
Total discoveries: 1
Instruments (1): 12-m Array (1)
Facilities (1)

In [11]:
# COMPLETE SUMMARY
print("=" * 80)
print("COMPLETE EXOPLANET DETECTION ANALYSIS SUMMARY")
print("=" * 80)

# Overall statistics
total_planets = len(ex)
total_methods = len(ex[detection_method].unique())
total_instruments = len(ex[instrument].dropna().unique())
total_facilities = len(ex[facility].dropna().unique())

print(f"Total confirmed exoplanets analyzed: {total_planets}")
print(f"Total detection methods: {total_methods}")
print(f"Total unique instruments: {total_instruments}")
print(f"Total unique facilities: {total_facilities}")

print(f"\nDetection method distribution:")
methods_summary = ex[detection_method].value_counts()
for method, count in methods_summary.items():
    percentage = (count / total_planets) * 100
    print(f"  {method}: {count} ({percentage:.1f}%)")

print(f"\nMost productive instruments (top 10):")
top_instruments = ex[instrument].value_counts().head(10)
for i, (instr, count) in enumerate(top_instruments.items(), 1):
    print(f"  {i:2d}. {instr}: {count} discoveries")

print(f"\nMost productive facilities (top 10):")
top_facilities = ex[facility].value_counts().head(10)
for i, (fac, count) in enumerate(top_facilities.items(), 1):
    print(f"  {i:2d}. {fac}: {count} discoveries")

print("=" * 80)

COMPLETE EXOPLANET DETECTION ANALYSIS SUMMARY
Total confirmed exoplanets analyzed: 5869
Total detection methods: 11
Total unique instruments: 97
Total unique facilities: 72

Detection method distribution:
  Transit: 4360 (74.3%)
  Radial Velocity: 1112 (18.9%)
  Microlensing: 237 (4.0%)
  Imaging: 83 (1.4%)
  Transit Timing Variations: 35 (0.6%)
  Eclipse Timing Variations: 17 (0.3%)
  Orbital Brightness Modulation: 9 (0.2%)
  Pulsar Timing: 8 (0.1%)
  Astrometry: 5 (0.1%)
  Pulsation Timing Variations: 2 (0.0%)
  Disk Kinematics: 1 (0.0%)

Most productive instruments (top 10):
   1. Kepler CCD Array: 3326 discoveries
   2. TESS CCD Array: 620 discoveries
   3. Multiple Instruments: 364 discoveries
   4. HARPS Spectrograph: 223 discoveries
   5. HIRES Spectrometer: 181 discoveries
   6. iKon-L CCD Camera: 155 discoveries
   7. OGLE CCD Array: 106 discoveries
   8. 18k Mosaic CCD camera: 103 discoveries
   9. Apogee 4K CCD Sensor: 96 discoveries
  10. 2K CCD Sensor: 60 discoveries

Most

---

## Data Visualization and Plotting Section

The following cells contain the plotting and visualization code for the exoplanet detection methods analysis.

In [12]:

print(exoplanets[detection_method].unique())
method_names = exoplanets.groupby([detection_method]).size().sort_values(ascending=False).index.tolist()
# print(columns)
methods = exoplanets.groupby([year,detection_method]).size().sort_values(ascending=False).reset_index(name="count")
print(methods)
display(methods[methods[year] == 2002])

['Radial Velocity' 'Imaging' 'Eclipse Timing Variations' 'Transit'
 'Transit Timing Variations' 'Astrometry' 'Microlensing' 'Disk Kinematics'
 'Orbital Brightness Modulation' 'Pulsation Timing Variations'
 'Pulsar Timing']
     disc_year            discoverymethod  count
0         2016                    Transit   1432
1         2014                    Transit    798
2         2021                    Transit    447
3         2018                    Transit    241
4         2023                    Transit    226
..         ...                        ...    ...
130       2022                 Astrometry      1
131       2020  Transit Timing Variations      1
132       2023                 Astrometry      1
133       2021  Transit Timing Variations      1
134       2024              Pulsar Timing      1

[135 rows x 3 columns]


Unnamed: 0,disc_year,discoverymethod,count
35,2002,Radial Velocity,28
115,2002,Transit,1


In [13]:
methods_quant = exoplanets.groupby([detection_method]).size().sort_values(ascending=False)
display(methods_quant)

discoverymethod
Transit                          4360
Radial Velocity                  1112
Microlensing                      237
Imaging                            83
Transit Timing Variations          35
Eclipse Timing Variations          17
Orbital Brightness Modulation       9
Pulsar Timing                       8
Astrometry                          5
Pulsation Timing Variations         2
Disk Kinematics                     1
dtype: int64

In [14]:
methods_by_year = methods.pivot(index = year, columns=[detection_method], values = "count").fillna(0)
display(methods_by_year)

discoverymethod,Astrometry,Disk Kinematics,Eclipse Timing Variations,Imaging,Microlensing,Orbital Brightness Modulation,Pulsar Timing,Pulsation Timing Variations,Radial Velocity,Transit,Transit Timing Variations
disc_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1992,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0
1994,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1996,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0
1997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0
1999,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,0.0,0.0
2000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,0.0,0.0
2001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0
2002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28.0,1.0,0.0


In [15]:
from bokeh.io import show
from bokeh.plotting import figure, output_file, save
from bokeh.palettes import Category20
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.transform import stack
# from bokeh.io.export import export_pdf

# # Prepare data for Bokeh
# years = methods_by_year.index.astype(str).tolist()

# # method_names.remove("Radial Velocity")
# # method_names.remove("Transit")
# # Create a ColumnDataSource
# data = {'year': years}
# for method in method_names:
#     data[method] = methods_by_year[method].tolist()

# source = ColumnDataSource(data=data)
# print(data)
# # Create the figure
# p = figure(x_range=years, height=600, width=900, 
#            title="Exoplanet Discoveries by Year and Method",
#            toolbar_location="right", 
#            tools="pan,wheel_zoom,box_zoom,reset,save")

# # Create stacked bars
# palette = Category20[min(20, len(method_names))]  # Category20 has 20 colors

# # This creates the stacked bars in one go
# renderers = p.vbar_stack(method_names, x='year', width=0.8, color=palette[:len(method_names)], 
#                          source=source, legend_label=method_names, name=method_names)

# # Style the plot
# p.x_range.range_padding = 0.1
# p.xgrid.grid_line_color = None
# p.y_range.start = 0
# p.legend.title = "Detection Method"
# p.legend.location = "top_left" 
# p.legend.orientation = "vertical"  # Vertical for better readability
# p.xaxis.axis_label = "Discovery Year"
# p.yaxis.axis_label = "Number of Exoplanets"
# p.xaxis.major_label_orientation = 1.2  # Rotate year labels

# # Customize the title appearance
# p.title.text_font_size = "24px"  # Make the font size bigger
# p.title.align = "center"        # Center the title
# p.title.text_color = "navy"     # Change the title color
# p.title.text_font_style = "bold" # Make the title bold

# # Configure hover tool for stacked bars
# # Add a separate hover tool that uses the renderers
# hover = HoverTool(tooltips=[
#     ("Year", "@year"),
#     ("Method", "$name"),
#     ("Count", "@$name")
# ])
# p.add_tools(hover)

# # Make sure the hover tool applies to all renderers
# hover.renderers = renderers
# for r in renderers:
#     r.glyph.line_color = None
# print([renderer for renderer in renderers])
# output_file(filename = "Exoplanets_Detections.html")
# save(p)
# # export_pdf(p, filename = "Exoplanets_Detections.pdf")
# show(p)

In [16]:
from bokeh.io import show, output_file, save
from bokeh.plotting import figure
from bokeh.palettes import Category20
from bokeh.models import ColumnDataSource, HoverTool, Title
from bokeh.transform import stack
from bokeh.layouts import column
import pandas as pd


# # Prepare data and create the first plot (p)
# years_p = methods_by_year.index.astype(str).tolist()
# method_names_p = method_names
# data_p = {'year': years_p}
# for method in method_names_p:
#     data_p[method] = methods_by_year[method].tolist()
# source_p = ColumnDataSource(data=data_p)

# p = figure(x_range=years_p, height=600, width=900,
#            title="All Exoplanet Discoveries by Year and Method",
#            toolbar_location="right",
#            tools="pan,wheel_zoom,box_zoom,reset,save")

# palette_p = Category20[min(20, len(method_names_p))]
# color_mapping = dict(zip(method_names_p, palette_p)) # Store the color mapping
# renderers_p = p.vbar_stack(method_names_p, x='year', width=0.8, color=palette_p[:len(method_names_p)],
#                          source=source_p, legend_label=method_names_p, name=method_names_p)

# p.x_range.range_padding = 0.1
# p.xgrid.grid_line_color = None
# p.y_range.start = 0
# p.legend.title = "Detection Method"
# p.legend.location = "top_left"
# p.legend.orientation = "vertical"
# p.xaxis.axis_label = "Discovery Year"
# p.yaxis.axis_label = "Number of Exoplanets"
# p.xaxis.major_label_orientation = 1.2

# hover_p = HoverTool(tooltips=[("Year", "@year"), ("Method", "$name"), ("Count", "@$name")])
# p.add_tools(hover_p)
# for r in renderers_p:
#     r.glyph.line_color = None

# p.title.text_font_size = "24px"
# p.title.align = "center"
# p.title.text_color = "navy"
# p.title.text_font_style = "bold"

# # Prepare data and create the second plot (q)
# years_q = methods_by_year.index.astype(str).tolist()
# method_names_q = method_names
# if "Radial Velocity" in method_names_q:
#     method_names_q.remove("Radial Velocity")
# if "Transit" in method_names_q:
#     method_names_q.remove("Transit")

# data_q = {'year': years_q}
# for method in method_names_q:
#     data_q[method] = methods_by_year[method].tolist()
# source_q = ColumnDataSource(data=data_q)

# # Use the color mapping from plot p for consistent colors
# palette_q = [color_mapping[method] for method in method_names_q]

# q = figure(x_range=years_q, height=600, width=900,
#            title="Selected Exoplanet Discoveries by Year and Method",
#            toolbar_location="right",
#            tools="pan,wheel_zoom,box_zoom,reset,save")


# renderers_q = q.vbar_stack(method_names_q, x='year', width=0.8, color=palette_q,
#                          source=source_q, legend_label=method_names_q, name=method_names_q)

# q.x_range.range_padding = 0.1
# q.xgrid.grid_line_color = None
# q.y_range.start = 0
# q.legend.title = "Detection Method"
# q.legend.location = "top_left"
# q.legend.orientation = "vertical"
# q.xaxis.axis_label = "Discovery Year"
# q.yaxis.axis_label = "Number of Exoplanets"
# q.xaxis.major_label_orientation = 1.2

# hover_q = HoverTool(tooltips=[("Year", "@year"), ("Method", "$name"), ("Count", "@$name")])
# q.add_tools(hover_q)
# for r in renderers_q:
#     r.glyph.line_color = None

# q.title.text_font_size = "24px"
# q.title.align = "center"
# q.title.text_color = "navy"
# q.title.text_font_style = "bold"

# # Create a row layout with both plots
# layout = column(p, q)

# # Save the layout to an HTML file
# output_file("Exoplanets_Detections_Combined.html")
# save(layout)

# # Optionally display the layout (if you want to see it in the browser as well)
# show(layout)


In [17]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

#DONT CHANGE - Creation of discoveries plot!!

# Ensure that we do not exceed the number of colors available
# num_methods = len(methods_by_year.columns)
# if num_methods > len(bokeh_colors):
#     raise ValueError(f"Number of methods ({num_methods}) exceeds number of available colors ({len(bokeh_colors)}).")
# color_mapping = dict(zip(methods_by_year.columns, bokeh_colors))


# Function to create the stacked bar plot
def create_stacked_bar_plot(ax, df, title, methods_to_plot, color_mapping, add_legend = False):
    """
    Creates a stacked bar plot using Matplotlib.

    Args:
        ax (matplotlib.axes._subplots.AxesSubplot): The Axes object to plot on.
        df (pandas.DataFrame): The DataFrame containing the data.
        title (str): The title of the plot.
        methods_to_plot (list): List of methods to include in the plot.
        color_mapping (dict): Dictionary mapping methods to colors.
    """
    years = df.index.astype(str)
    # Create the stacked bar chart
    bottom = [0] * len(years)  # Initialize the bottom of the bars
    for i, method in enumerate(methods_to_plot):
        ax.bar(years, df[method], bottom=bottom, label=method, color=color_mapping[method])
        bottom = [b + v for b, v in zip(bottom, df[method])] #update the bottom for the next bar

    ax.set_title(title, fontsize=16, color='navy', fontweight='bold', loc='center')
    ax.set_xlabel("Discovery Year", fontsize=12)
    ax.set_ylabel("Number of Exoplanets", fontsize=12)
    ax.tick_params(axis='x', rotation=45)  # Rotate x-axis labels for readability
    if add_legend:
        ax.legend(title="Detection Method", loc='upper left')
    ax.grid(False)  # Remove gridlines for a cleaner look



# Prepare data and create the first plot
years_p = methods_by_year.index.astype(str).tolist()
method_names_p = method_names.copy()
palette_p = Category20[min(20, len(method_names_p))]
color_mapping = dict(zip(method_names_p, palette_p)) # Store the color mapping

# Prepare data and create the second plot
years_q = methods_by_year.index.astype(str).tolist()
method_names_q = method_names.copy()
if "Radial Velocity" in method_names_q:
    method_names_q.remove("Radial Velocity")
    print(True)
if "Transit" in method_names_q:
    method_names_q.remove("Transit")
    print(True)

palette_q = [color_mapping[method] for method in method_names_q]

# Create the figure and axes for the two plots in a column
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))  # 2 rows, 1 column, adjust figsize as needed
fig.subplots_adjust(hspace=0.3) #adjust the space between the plots

# Create the first plot (all methods)
create_stacked_bar_plot(ax1, methods_by_year, "All Exoplanet Discoveries by Year and Method", method_names_p, color_mapping, True)
# Create the second plot (excluding 'Radial Velocity' and 'Transit')
create_stacked_bar_plot(ax2, methods_by_year, "Selected Exoplanet Discoveries by Year and Method", method_names_q, color_mapping)

pdf_filename = "Exoplanets_Detections_Combined.pdf"
plt.savefig(pdf_filename, bbox_inches='tight', dpi=300)
plt.close(fig)

print("Plots saved to Exoplanets_Detections_Combined.pdf")


True
True
Plots saved to Exoplanets_Detections_Combined.pdf


In [19]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# NO CAMBIAR - ¡Creación del gráfico de descubrimientos!

# Diccionario para traducir nombres de métodos
traduccion_metodos = {
    'Transit': 'Tránsitos',
    'Radial Velocity': 'Velocidad Radial', 
    'Microlensing': 'Microlentes Gravitacionales',
    'Imaging': 'Imagen Directa',
    'Transit Timing Variations': 'Variaciones en el Tiempo de Tránsito',
    'Eclipse Timing Variations': 'Variaciones en el Tiempo de Eclipse',
    'Orbital Brightness Modulation': 'Modulación Orbital del Brillo',
    'Pulsation Timing Variations': 'Pulsar Timing',
    'Astrometry': 'Astrometría',
    'Disk Kinematics': 'Disk Kinematics'
}

# Función para crear el gráfico de barras apiladas
def create_stacked_bar_plot(ax, df, title, methods_to_plot, color_mapping, add_legend = False):
    """
    Crea un gráfico de barras apiladas usando Matplotlib.

    Args:
        ax (matplotlib.axes._subplots.AxesSubplot): El objeto Axes donde dibujar.
        df (pandas.DataFrame): El DataFrame que contiene los datos.
        title (str): El título del gráfico.
        methods_to_plot (list): Lista de métodos a incluir en el gráfico.
        color_mapping (dict): Diccionario que mapea métodos a colores.
        add_legend (bool): Si agregar leyenda al gráfico.
    """
    years = df.index.astype(str)
    # Crear el gráfico de barras apiladas
    bottom = [0] * len(years)  # Inicializar la base de las barras
    for i, method in enumerate(methods_to_plot):
        # Traducir el nombre del método para la leyenda
        nombre_traducido = traduccion_metodos.get(method, method)
        ax.bar(years, df[method], bottom=bottom, label=nombre_traducido, color=color_mapping[method])
        bottom = [b + v for b, v in zip(bottom, df[method])] # actualizar la base para la siguiente barra

    ax.set_title(title, fontsize=16, color='navy', fontweight='bold', loc='center')
    ax.set_xlabel("Año de Descubrimiento", fontsize=12)
    ax.set_ylabel("Número de Exoplanetas", fontsize=12)
    ax.tick_params(axis='x', rotation=45)  # Rotar etiquetas del eje x para legibilidad
    if add_legend:
        ax.legend(title="Método de Detección", loc='upper left')
    ax.grid(False)  # Quitar líneas de cuadrícula para un aspecto más limpio

# Preparar datos y crear el primer gráfico
years_p = methods_by_year.index.astype(str).tolist()
method_names_p = method_names.copy()
palette_p = Category20[min(20, len(method_names_p))]
color_mapping = dict(zip(method_names_p, palette_p)) # Almacenar el mapeo de colores

# Preparar datos y crear el segundo gráfico
years_q = methods_by_year.index.astype(str).tolist()
method_names_q = method_names.copy()
if "Radial Velocity" in method_names_q:
    method_names_q.remove("Radial Velocity")
    print(True)
if "Transit" in method_names_q:
    method_names_q.remove("Transit")
    print(True)

palette_q = [color_mapping[method] for method in method_names_q]

# Crear la figura y los ejes para los dos gráficos en una columna
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))  # 2 filas, 1 columna, ajustar figsize según necesidad
fig.subplots_adjust(hspace=0.3) # ajustar el espacio entre los gráficos

# Crear el primer gráfico (todos los métodos)
create_stacked_bar_plot(ax1, methods_by_year, "Todos los Descubrimientos de Exoplanetas por Año y Método", method_names_p, color_mapping, True)
# Crear el segundo gráfico (excluyendo 'Radial Velocity' y 'Transit')
create_stacked_bar_plot(ax2, methods_by_year, "Descubrimientos Seleccionados de Exoplanetas por Año y Método", method_names_q, color_mapping)

pdf_filename = "Descubrimientos_Exoplanetas_Combinado.pdf"
plt.savefig(pdf_filename, bbox_inches='tight', dpi=300)
plt.close(fig)

print("Gráficos guardados en Descubrimientos_Exoplanetas_Combinado.pdf")

True
True
Gráficos guardados en Descubrimientos_Exoplanetas_Combinado.pdf


In [22]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# NO CAMBIAR - ¡Creación del gráfico de descubrimientos!

# Diccionario para traducir nombres de métodos
traduccion_metodos = {
    'Transit': 'Tránsitos',
    'Radial Velocity': 'Velocidad Radial', 
    'Microlensing': 'Microlentes Gravitacionales',
    'Imaging': 'Imagen Directa',
    'Transit Timing Variations': 'Variaciones en el Tiempo de Tránsito',
    'Eclipse Timing Variations': 'Variaciones en el Tiempo de Eclipse',
    'Orbital Brightness Modulation': 'Modulación Orbital del Brillo',
    'Pulsation Timing Variations': 'Pulsar Timing',
    'Astrometry': 'Astrometría',
    'Disk Kinematics': 'Disk Kinematics'
}

# Función para crear el gráfico de barras apiladas
def create_stacked_bar_plot(ax, df, title, methods_to_plot, color_mapping, add_legend = False):
    """
    Crea un gráfico de barras apiladas usando Matplotlib.

    Args:
        ax (matplotlib.axes._subplots.AxesSubplot): El objeto Axes donde dibujar.
        df (pandas.DataFrame): El DataFrame que contiene los datos.
        title (str): El título del gráfico.
        methods_to_plot (list): Lista de métodos a incluir en el gráfico.
        color_mapping (dict): Diccionario que mapea métodos a colores.
        add_legend (bool): Si agregar leyenda al gráfico.
    """
    years = df.index.astype(str)
    # Crear el gráfico de barras apiladas
    bottom = [0] * len(years)  # Inicializar la base de las barras
    for i, method in enumerate(methods_to_plot):
        # Traducir el nombre del método para la leyenda
        nombre_traducido = traduccion_metodos.get(method, method)
        ax.bar(years, df[method], bottom=bottom, label=nombre_traducido, color=color_mapping[method])
        bottom = [b + v for b, v in zip(bottom, df[method])] # actualizar la base para la siguiente barra

    ax.set_title(title, fontsize=16, color='navy', fontweight='bold', loc='center')
    ax.set_xlabel("Año de Descubrimiento", fontsize=12)
    ax.set_ylabel("Número de Exoplanetas", fontsize=12)
    ax.tick_params(axis='x', rotation=45)  # Rotar etiquetas del eje x para legibilidad
    if add_legend:
        ax.legend(title="Método de Detección", loc='upper left')
    ax.grid(False)  # Quitar líneas de cuadrícula para un aspecto más limpio

# Preparar datos y crear el primer gráfico
years_p = methods_by_year.index.astype(str).tolist()
method_names_p = method_names.copy()
palette_p = Category20[min(20, len(method_names_p))]
color_mapping = dict(zip(method_names_p, palette_p)) # Almacenar el mapeo de colores

# Preparar datos y crear el segundo gráfico
years_q = methods_by_year.index.astype(str).tolist()
method_names_q = method_names.copy()
if "Radial Velocity" in method_names_q:
    method_names_q.remove("Radial Velocity")
    print(True)
if "Transit" in method_names_q:
    method_names_q.remove("Transit")
    print(True)

palette_q = [color_mapping[method] for method in method_names_q]

# Crear la figura y los ejes para los dos gráficos en una columna
fig, (ax1) = plt.subplots(1, 1, figsize=(12, 6))  # 2 filas, 1 columna, ajustar figsize según necesidad
fig.patch.set_facecolor('#E8E8E8')  # Color de fondo de la figura
ax1.set_facecolor('#E8E8E8')        # Color de fondo de los ejes
fig.subplots_adjust(hspace=0.3) # ajustar el espacio entre los gráficos

# Crear el primer gráfico (todos los métodos)
create_stacked_bar_plot(ax1, methods_by_year, "Todos los Descubrimientos de Exoplanetas por Año y Método", method_names_p, color_mapping, True)
# Crear el segundo gráfico (excluyendo 'Radial Velocity' y 'Transit')
# create_stacked_bar_plot(ax2, methods_by_year, "Descubrimientos Seleccionados de Exoplanetas por Año y Método", method_names_q, color_mapping)

pdf_filename = "Descubrimientos_Exoplanetas.png"
plt.savefig(pdf_filename, bbox_inches='tight', dpi=300, facecolor='#E8E8E8')
plt.close(fig)

print("Gráficos guardados en Descubrimientos_Exoplanetas_Combinado.pdf")

True
True
Gráficos guardados en Descubrimientos_Exoplanetas_Combinado.pdf
Gráficos guardados en Descubrimientos_Exoplanetas_Combinado.pdf
