In [1]:
import os
import requests
import pandas as pd
import matplotlib.pyplot as plt
import streamlit as st
from datetime import datetime
import pyarrow as pa
import pyarrow.parquet as pq
import json
import time
import re

In [2]:
# Configurações
OUTPUT_DIR = 'data'
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [40]:
def fetch_spacex_launches():
    url = "https://api.spacexdata.com/v3/launches"
    response = requests.get(url)
    response.raise_for_status()
    return pd.json_normalize(response.json())

launches_df = fetch_spacex_launches()


# 2. Conversão e padronização de formatos
launches_df['launch_date_utc'] = pd.to_datetime(launches_df['launch_date_utc'], utc=True)
launches_df['year'] = launches_df['launch_date_utc'].dt.year
launches_df['month'] = launches_df['launch_date_utc'].dt.month
launches_df['year_month'] = launches_df['launch_date_utc'].dt.strftime('%Y-%m')


# 4. Filtragens e extrações de colunas relevantes
relevant_cols = ['flight_number', 'rocket.rocket_name', 'launch_date_utc', 'launch_success', 'rocket.rocket_id', 'year', 'month', 'year_month']
launches_df = launches_df[relevant_cols]


launches_df

Unnamed: 0,flight_number,rocket.rocket_name,launch_date_utc,launch_success,rocket.rocket_id,year,month,year_month
0,1,Falcon 1,2006-03-24 22:30:00+00:00,False,falcon1,2006,3,2006-03
1,2,Falcon 1,2007-03-21 01:10:00+00:00,False,falcon1,2007,3,2007-03
2,3,Falcon 1,2008-08-03 03:34:00+00:00,False,falcon1,2008,8,2008-08
3,4,Falcon 1,2008-09-28 23:15:00+00:00,True,falcon1,2008,9,2008-09
4,5,Falcon 1,2009-07-13 03:35:00+00:00,True,falcon1,2009,7,2009-07
...,...,...,...,...,...,...,...,...
106,107,Falcon 9,2020-11-16 00:27:00+00:00,True,falcon9,2020,11,2020-11
107,108,Falcon 9,2020-11-21 17:17:00+00:00,True,falcon9,2020,11,2020-11
108,109,Falcon 9,2020-10-24 15:31:00+00:00,True,falcon9,2020,10,2020-10
109,110,Falcon 9,2020-12-06 16:17:00+00:00,,falcon9,2020,12,2020-12


In [None]:
def fetch_spacex_rockets():
    url = "https://api.spacexdata.com/v3/rockets"
    response = requests.get(url)
    response.raise_for_status()
    return pd.json_normalize(response.json())

rockets_df = fetch_spacex_rockets()

relevant_cols = ['cost_per_launch', 'rocket_id']
rockets_df = rockets_df[relevant_cols]


df_merge = pd.merge(launches_df, rockets_df, left_on="rocket.rocket_id", right_on="rocket_id", how="right")


df_merge

Unnamed: 0,flight_number,rocket.rocket_name,launch_date_utc,launch_success,rocket.rocket_id,year,month,year_month,cost_per_launch,rocket_id
0,1.0,Falcon 1,2006-03-24 22:30:00+00:00,False,falcon1,2006.0,3.0,2006-03,6700000,falcon1
1,2.0,Falcon 1,2007-03-21 01:10:00+00:00,False,falcon1,2007.0,3.0,2007-03,6700000,falcon1
2,3.0,Falcon 1,2008-08-03 03:34:00+00:00,False,falcon1,2008.0,8.0,2008-08,6700000,falcon1
3,4.0,Falcon 1,2008-09-28 23:15:00+00:00,True,falcon1,2008.0,9.0,2008-09,6700000,falcon1
4,5.0,Falcon 1,2009-07-13 03:35:00+00:00,True,falcon1,2009.0,7.0,2009-07,6700000,falcon1
...,...,...,...,...,...,...,...,...,...,...
107,110.0,Falcon 9,2020-12-13 17:30:00+00:00,,falcon9,2020.0,12.0,2020-12,50000000,falcon9
108,55.0,Falcon Heavy,2018-02-06 20:45:00+00:00,True,falconheavy,2018.0,2.0,2018-02,90000000,falconheavy
109,77.0,Falcon Heavy,2019-04-11 22:35:00+00:00,True,falconheavy,2019.0,4.0,2019-04,90000000,falconheavy
110,81.0,Falcon Heavy,2019-06-25 03:30:00+00:00,True,falconheavy,2019.0,6.0,2019-06,90000000,falconheavy
