In [None]:
import pandas as pd
import plotly.express as px

In [None]:
# show all columns
pd.set_option("display.max_columns", None)

In [None]:
# plotly with transparent background
px.defaults.template = "plotly_white"

In [None]:
df = pd.read_excel("/Users/adminuser/Downloads/UCS-Satellite-Database-5-1-2022.xls")
df

In [None]:
df[df["Purpose"] == "Surveillance"]

In [None]:
def set_purpose(x):
    if all(cat in x.lower() for cat in ["earth", "observ"]):
        return "Earth Observation"
    elif any(cat in x.lower() for cat in ["communication", "radio"]):
        return "Communications"
    elif "space" in x.lower():
        return "Space Science"
    elif "navigation" in x.lower():
        return "Navigation"
    elif "technology" in x.lower():
        return "Technology R&D"
    elif "earth science" in x.lower():
        return "Earth Science"
    else:
        return "Other"

In [None]:
# clean the purpose column
grouped_df = df.copy()
grouped_df["Purpose"] = grouped_df["Purpose"].fillna("Unknown")
grouped_df["Purpose"] = grouped_df["Purpose"].apply(set_purpose)
# get the number of satellites launched over time, for each "Purpose"
grouped_df = grouped_df.groupby(["Date of Launch", "Purpose"])["NORAD Number"].count().to_frame().reset_index()
grouped_df = grouped_df.rename(columns={"NORAD Number": "Number of Satellites Launched"})
# get the cumulative sum of "Number of Satellites Launched" over time, for each "Purpose"
# grouped_df["Number of Satellites Launched"] = grouped_df.groupby("Purpose")["Number of Satellites Launched"].cumsum()
# resample for every year
grouped_df = grouped_df.set_index("Date of Launch").groupby("Purpose").resample("Y").sum().groupby("Purpose").cumsum()
grouped_df = grouped_df.reset_index()
grouped_df

In [None]:
# do a plot that shows the number of satellites launched over time, stacked by purpose
fig = px.line(grouped_df, x="Date of Launch", y="Number of Satellites Launched", color="Purpose", log_y=True)
# change y-axis title
fig.update_yaxes(title_text="Number of Satellites Launched<br><sup>(Cumulative; log scale)</sup>")
# save figure as a high resolution PNG with scaled text
fig.write_image("satellite_launches_over_time_log_scale.png", scale=2)
fig.show()

In [None]:
# do a plot that shows the number of satellites launched over time, stacked by purpose
fig = px.line(grouped_df, x="Date of Launch", y="Number of Satellites Launched", color="Purpose")
# change y-axis title
fig.update_yaxes(title_text="Number of Satellites Launched<br><sup>(Cumulative)</sup>")
# save figure as a high resolution PNG with scaled text
fig.write_image("satellite_launches_over_time.png", scale=2)
fig.show()

In [None]:
# do a plot that shows the number of satellites launched over time, stacked by purpose
px.area(grouped_df, x="Date of Launch", y="Number of Satellites Launched", color="Purpose", title="Satellite Launches by Purpose")