In [42]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
import matplotlib.pyplot as plt
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import re

In [43]:
base_url = "https://nextspaceflight.com/launches/past/"
last_page = 1


In [52]:


options = Options()
options.add_argument("--headless=new")
browser = webdriver.Chrome(options=options)

launch_data = []
for page in range(1, last_page + 1):
    browser.get(f"{base_url}?page={page}")
    cards = browser.find_elements(By.CLASS_NAME, "mdl-card")
    for card in cards:
        company = card.find_element(By.CLASS_NAME, "mdl-card__title-text").text
        rocket_and_payload = card.find_element(By.CLASS_NAME, "header-style").text
        date_and_location = card.find_element(By.CLASS_NAME, "mdl-card__supporting-text").text
        details_url = card.find_element(By.CLASS_NAME, "mdc-button").get_attribute("href")
        border_color = card.value_of_css_property("border-top-color")

        if "rgba(69, 207, 93, 1)" == border_color:
            successful = "Yes"
        else:
            successful = "No" 

        launch_dict = {
            'company': company,
            'rocket_and_payload': rocket_and_payload,
            'date_and_location': date_and_location,
            'details_url': details_url,
            'successful': successful
        }
        launch_data.append(launch_dict)
browser.quit()

df = pd.DataFrame(launch_data, columns=['company', 'rocket_and_payload', 'date_and_location', 'details_url', 'successful'])

In [None]:
browser = webdriver.Chrome(options=options)
wait = WebDriverWait(browser, 10)

#Aqui se extrae el peso de cada carga para despues poder sumarlo y tener el peso total
def extract_weight(text):
    try:
        match = re.search(r':\s*([\d,]+)\s*kg', text)
        if match:
            weight_str = match.group(1).replace(',', '')
            return int(weight_str)
        else:
            return 0
    except:
        return 0

# Aqui se extrae la altura y diametro en float para despues poder generar el volumen del cohete
def extract_metric_value(text):
   
    try:
        match = re.search(r':\s*([\d.,]+)\s*m', text)
        if match:
            value_str = match.group(1).replace(',', '')
            return float(value_str)
        else:
            return 0.0
    except:
        return 0.0

prices = []
leo_weights = []  
gto_weights = []  
rocket_heights = []
fairing_diameters = []
fairing_heights = []

for index in range(len(df.details_url)):
    print(f"Processing launch {index + 1}/{len(df.details_url)}")
    browser.get(df.details_url[index])

    details_els = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "mdl-card__supporting-text")))
    details_texts = [el.text for el in details_els]

    price = "N/A"
    leo_weight = 0
    gto_weight = 0
    rocket_h = 0.0
    fairing_d = 0.0
    fairing_h = 0.0

    if len(details_texts) > 1:
        parts = details_texts[1].split("\n")
        for part in parts:
            if "Price: " in part:
                price = part.replace("Price: ", "").strip()
            elif "Payload to LEO:" in part:
                leo_weight = extract_weight(part)
            elif "Payload to GTO:" in part:
                gto_weight = extract_weight(part)
            elif "Rocket Height:" in part:  
                rocket_h = extract_metric_value(part)
            elif "Fairing Diameter:" in part:
                fairing_d = extract_metric_value(part)
            elif "Fairing Height:" in part:
                fairing_h = extract_metric_value(part)

   
    prices.append(price)
    leo_weights.append(leo_weight)
    gto_weights.append(gto_weight)
    rocket_heights.append(rocket_h)
    fairing_diameters.append(fairing_d)
    fairing_heights.append(fairing_h)

browser.quit()


df["price"] = prices
df["leo_weight_kg"] = leo_weights
df["gto_weight_kg"] = gto_weights
df["total_payload_kg"] = df["leo_weight_kg"] + df["gto_weight_kg"]
df["rocket_height_m"] = rocket_heights
df["fairing_diameter_m"] = fairing_diameters
df["fairing_height_m"] = fairing_heights

df

Unnamed: 0,company,rocket_and_payload,date_and_location,details_url,successful,price,leo_weight_kg,gto_weight_kg,total_payload_kg,rocket_height,fairing_diameter,fairing_height,rocket_height_m,fairing_diameter_m,fairing_height_m
0,CASC,Long March 7A | Yaogan 45,"Mon Sep 8, 2025 23:00 CLST\nLC-201, Wenchang S...",https://nextspaceflight.com/launches/details/7943,Yes,,12000,7000,19000,Fairing Height: 12.4 m,0,0,58.0,4.2,12.4
1,Chinarocket,Jielong 3 | Geely Constellation Group 05,"Mon Sep 8, 2025 16:48 CLST\nOriental Spaceport...",https://nextspaceflight.com/launches/details/7942,Yes,,1500,0,1500,Fairing Diameter: 3.35 m,0,0,31.0,3.35,0.0
2,SpaceX,Falcon 9 Block 5 | Starlink Group 17-9,"Sat Sep 6, 2025 2:06 PM CLT\nSLC-4E, Vandenber...",https://nextspaceflight.com/launches/details/7935,Yes,$69.75 million,22800,8300,31100,Fairing Height: 13.0 m,0,0,70.0,5.2,13.0
3,CASC,Long March 6A | Yaogan 40 Group 03,"Sat Sep 6, 2025 12:34 PM CLT\nLC-9A, Taiyuan S...",https://nextspaceflight.com/launches/details/7940,Yes,,5000,0,5000,Fairing Height: 5.7 m,0,0,50.0,4.2,5.7
4,SpaceX,Falcon 9 Block 5 | Starlink Group 10-57,"Fri Sep 5, 2025 8:32 AM CLT\nLC-39A, Kennedy S...",https://nextspaceflight.com/launches/details/7934,Yes,$69.75 million,22800,8300,31100,Fairing Height: 13.0 m,0,0,70.0,5.2,13.0
5,Galactic Energy,Ceres 1 | 3 satellites,"Fri Sep 5, 2025 7:39 AM CLT\nSite 95A, Jiuquan...",https://nextspaceflight.com/launches/details/7939,Yes,$4.38 million,400,0,400,Fairing Height: 2.5 m,0,0,19.0,1.4,2.5
6,CASC,Long March 3C/YZ-1 | Shiyan 29,"Thu Sep 4, 2025 10:34 PM CLT\nLC-2, Xichang Sa...",https://nextspaceflight.com/launches/details/7938,Yes,$20.0 million,7500,3500,11000,Fairing Height: 9.78 m,0,0,55.64,4.2,9.78
7,SpaceX,Falcon 9 Block 5 | Starlink Group 10-22,"Wed Sep 3, 2025 7:56 AM CLT\nSLC-40, Cape Cana...",https://nextspaceflight.com/launches/details/7930,Yes,$69.75 million,22800,8300,31100,Fairing Height: 13.0 m,0,0,70.0,5.2,13.0
8,SpaceX,Falcon 9 Block 5 | Starlink Group 17-8,"Tue Sep 2, 2025 11:51 PM CLT\nSLC-4E, Vandenbe...",https://nextspaceflight.com/launches/details/7931,Yes,$69.75 million,22800,8300,31100,Fairing Height: 13.0 m,0,0,70.0,5.2,13.0
9,IAI,Shavit 2 | Ofek-19,"Tue Sep 2, 2025 3:30 PM CLT\nPad 1, Palmachim ...",https://nextspaceflight.com/launches/details/7937,Yes,,350,0,350,Fairing Height: 4.37 m,0,0,22.0,1.35,4.37


In [32]:
df.size
df.shape

(60, 4)

In [17]:
df.to_csv('launches.csv', index=False)

In [33]:
df["rocket"] = df["rocket_and_payload"].str.split("|").str[0]
df["payload"] = df["rocket_and_payload"].str.split("|").str[1]
df["date"] = df["date_and_location"].str.split("GMT-4").str[0]
df["location"] = df["date_and_location"].str.split("GMT-4").str[1]
df.drop(columns=["rocket_and_payload", "date_and_location"], inplace=True)
df

Unnamed: 0,company,details_url,rocket,payload,date,location
0,SpaceX,https://nextspaceflight.com/launches/details/7935,Falcon 9 Block 5,Starlink Group 17-9,"Sat Sep 6, 2025 2:06 PM","\nSLC-4E, Vandenberg SFB, California, USA"
1,CASC,https://nextspaceflight.com/launches/details/7940,Long March 6A,Yaogan 40 Group 03,"Sat Sep 6, 2025 12:34 PM","\nLC-9A, Taiyuan Satellite Launch Center, China"
2,SpaceX,https://nextspaceflight.com/launches/details/7934,Falcon 9 Block 5,Starlink Group 10-57,"Fri Sep 5, 2025 8:32 AM","\nLC-39A, Kennedy Space Center, Florida, USA"
3,Galactic Energy,https://nextspaceflight.com/launches/details/7939,Ceres 1,3 satellites,"Fri Sep 5, 2025 7:39 AM","\nSite 95A, Jiuquan Satellite Launch Center, C..."
4,CASC,https://nextspaceflight.com/launches/details/7938,Long March 3C/YZ-1,Shiyan 29,"Thu Sep 4, 2025 10:34 PM","\nLC-2, Xichang Satellite Launch Center, China"
5,SpaceX,https://nextspaceflight.com/launches/details/7930,Falcon 9 Block 5,Starlink Group 10-22,"Wed Sep 3, 2025 7:56 AM","\nSLC-40, Cape Canaveral SFS, Florida, USA"
6,SpaceX,https://nextspaceflight.com/launches/details/7931,Falcon 9 Block 5,Starlink Group 17-8,"Tue Sep 2, 2025 11:51 PM","\nSLC-4E, Vandenberg SFB, California, USA"
7,IAI,https://nextspaceflight.com/launches/details/7937,Shavit 2,Ofek-19,"Tue Sep 2, 2025 3:30 PM","\nPad 1, Palmachim Airbase, Israel"
8,SpaceX,https://nextspaceflight.com/launches/details/7926,Falcon 9 Block 5,Starlink Group 10-14,"Sun Aug 31, 2025 7:49 AM","\nSLC-40, Cape Canaveral SFS, Florida, USA"
9,SpaceX,https://nextspaceflight.com/launches/details/7917,Falcon 9 Block 5,Starlink Group 17-7,"Sat Aug 30, 2025 12:59 AM","\nSLC-4E, Vandenberg SFB, California, USA"


In [34]:
def clean_date(scraped_date_string):
    months = {"Jan":"01", "Feb":"02", "Mar":"03", "Apr":"04", "May":"05", "Jun":"06",
              "Jul":"07", "Aug":"08", "Sep":"09", "Oct":"10", "Nov":"11", "Dec":"12"}
    parts = scraped_date_string.split(" ")
    month = parts[1]
    day = parts[2].replace(",", "")
    year = parts[3]
    return f"{year}/{months[month]}/{day}"

In [35]:
df["cleaned_date"] = df["date"].apply(clean_date)
df["year"] = df["cleaned_date"].str.split("/").str[0]
df["month"] = df["cleaned_date"].str.split("/").str[1]
df["day"] = df["cleaned_date"].str.split("/").str[2]
df.drop(columns=["date", "cleaned_date"], inplace=True)
df

Unnamed: 0,company,details_url,rocket,payload,location,year,month,day
0,SpaceX,https://nextspaceflight.com/launches/details/7935,Falcon 9 Block 5,Starlink Group 17-9,"\nSLC-4E, Vandenberg SFB, California, USA",2025,9,6
1,CASC,https://nextspaceflight.com/launches/details/7940,Long March 6A,Yaogan 40 Group 03,"\nLC-9A, Taiyuan Satellite Launch Center, China",2025,9,6
2,SpaceX,https://nextspaceflight.com/launches/details/7934,Falcon 9 Block 5,Starlink Group 10-57,"\nLC-39A, Kennedy Space Center, Florida, USA",2025,9,5
3,Galactic Energy,https://nextspaceflight.com/launches/details/7939,Ceres 1,3 satellites,"\nSite 95A, Jiuquan Satellite Launch Center, C...",2025,9,5
4,CASC,https://nextspaceflight.com/launches/details/7938,Long March 3C/YZ-1,Shiyan 29,"\nLC-2, Xichang Satellite Launch Center, China",2025,9,4
5,SpaceX,https://nextspaceflight.com/launches/details/7930,Falcon 9 Block 5,Starlink Group 10-22,"\nSLC-40, Cape Canaveral SFS, Florida, USA",2025,9,3
6,SpaceX,https://nextspaceflight.com/launches/details/7931,Falcon 9 Block 5,Starlink Group 17-8,"\nSLC-4E, Vandenberg SFB, California, USA",2025,9,2
7,IAI,https://nextspaceflight.com/launches/details/7937,Shavit 2,Ofek-19,"\nPad 1, Palmachim Airbase, Israel",2025,9,2
8,SpaceX,https://nextspaceflight.com/launches/details/7926,Falcon 9 Block 5,Starlink Group 10-14,"\nSLC-40, Cape Canaveral SFS, Florida, USA",2025,8,31
9,SpaceX,https://nextspaceflight.com/launches/details/7917,Falcon 9 Block 5,Starlink Group 17-7,"\nSLC-4E, Vandenberg SFB, California, USA",2025,8,30


### Visualización de datos