In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objs as go

# These might be helpful:
from iso3166 import countries
from datetime import datetime, timedelta

In [2]:
df = pd.read_csv("mission_launches.csv")
df.head()
df.tail()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Organisation,Location,Date,Detail,Rocket_Status,Price,Mission_Status
4319,4319,4319,US Navy,"LC-18A, Cape Canaveral AFS, Florida, USA","Wed Feb 05, 1958 07:33 UTC",Vanguard | Vanguard TV3BU,StatusRetired,,Failure
4320,4320,4320,AMBA,"LC-26A, Cape Canaveral AFS, Florida, USA","Sat Feb 01, 1958 03:48 UTC",Juno I | Explorer 1,StatusRetired,,Success
4321,4321,4321,US Navy,"LC-18A, Cape Canaveral AFS, Florida, USA","Fri Dec 06, 1957 16:44 UTC",Vanguard | Vanguard TV3,StatusRetired,,Failure
4322,4322,4322,RVSN USSR,"Site 1/5, Baikonur Cosmodrome, Kazakhstan","Sun Nov 03, 1957 02:30 UTC",Sputnik 8K71PS | Sputnik-2,StatusRetired,,Success
4323,4323,4323,RVSN USSR,"Site 1/5, Baikonur Cosmodrome, Kazakhstan","Fri Oct 04, 1957 19:28 UTC",Sputnik 8K71PS | Sputnik-1,StatusRetired,,Success


In [3]:
df.dtypes

Unnamed: 0.1       int64
Unnamed: 0         int64
Organisation      object
Location          object
Date              object
Detail            object
Rocket_Status     object
Price             object
Mission_Status    object
dtype: object

In [4]:
df["Date"] = pd.to_datetime(df["Date"], format="%a %b %d, %Y", exact=False)

In [5]:
df["Launch Vehicle"] = df["Detail"].str.split("|", expand=True)[1]

In [6]:
df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Organisation,Location,Date,Detail,Rocket_Status,Price,Mission_Status,Launch Vehicle
0,0,0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA",2020-08-07,Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success,Starlink V1 L9 & BlackSky
1,1,1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...",2020-08-06,Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success,Gaofen-9 04 & Q-SAT
2,2,2,SpaceX,"Pad A, Boca Chica, Texas, USA",2020-08-04,Starship Prototype | 150 Meter Hop,StatusActive,,Success,150 Meter Hop
3,3,3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan",2020-07-30,Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success,Ekspress-80 & Ekspress-103
4,4,4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA",2020-07-30,Atlas V 541 | Perseverance,StatusActive,145.0,Success,Perseverance


In [7]:
df["Year"] = pd.DatetimeIndex(df["Date"]).year

In [38]:
launches_per_year = df.groupby("Year").count().Detail
launches_per_year.head()


fig = px.line(
    data_frame=launches_per_year,
    template="plotly_dark",
    labels={
        "value": "Launch Count"
    },
    title="Number of Rocket Launches per Year",
    width=1200,
    height=600
)
fig.update_layout(showlegend=False)

fig.show()

In [9]:
military = [
    "Arm??e de l'Air", "IRGC", "RVSN USSR", "US Air Force", "US Navy", "VKS RF"
]
space_agency = [
    "AEB", "ASI", "CNES", "ESA", "ISA", "ISAS", "ISRO", "JAXA", "KARI", "KCST", "NASA", "Roscosmos"
]
state_owned_company = [
    "OKB-586", "CASC", "CASIC", "Kosmotras", "Sandia", "Yuzhmash"
]
private_company = [
    "AMBA", "Arianespace", "Blue Origin", "Boeing", "CECLES", "Douglas", "EER", "Eurockot", "ExPace", "Exos", "General Dynamics", "IAI", "ILS", "Khrunichev", "Land Launch", "Landspace", "Lockheed", "MHI", "MITT", "Martin Marietta", "Northrop", "OneSpace", "RAE", "Rocket Lab", "SRC", "Sea Launch", "SpaceX", "Starsem", "ULA", "UT", "Virgin Orbit", "i-Space"
]

In [10]:
def get_organization_type(org):
    if org in military:
        return 'Military'
    elif org in space_agency:
        return 'Space Agency'
    elif org in state_owned_company:
        return 'State-Owned Company'
    elif org in private_company:
        return 'Private Company'
    else:
        return 'Other'


df['Organisation Type'] = df['Organisation'].apply(get_organization_type)

In [11]:
df["Military"] = np.where(df["Organisation"].isin(military), 1, 0)
df["Space Agency"] = np.where(df["Organisation"].isin(space_agency), 1, 0)
df["State-Owned Company"] = np.where(df["Organisation"].isin(state_owned_company), 1, 0)
df["Private Company"] = np.where(df["Organisation"].isin(private_company), 1, 0)

In [13]:
print(sorted(df["Organisation"].unique()))

['AEB', 'AMBA', 'ASI', 'Arianespace', "Arm??e de l'Air", 'Blue Origin', 'Boeing', 'CASC', 'CASIC', 'CECLES', 'CNES', 'Douglas', 'EER', 'ESA', 'Eurockot', 'ExPace', 'Exos', 'General Dynamics', 'IAI', 'ILS', 'IRGC', 'ISA', 'ISAS', 'ISRO', 'JAXA', 'KARI', 'KCST', 'Khrunichev', 'Kosmotras', 'Land Launch', 'Landspace', 'Lockheed', 'MHI', 'MITT', 'Martin Marietta', 'NASA', 'Northrop', 'OKB-586', 'OneSpace', 'RAE', 'RVSN USSR', 'Rocket Lab', 'Roscosmos', 'SRC', 'Sandia', 'Sea Launch', 'SpaceX', 'Starsem', 'ULA', 'US Air Force', 'US Navy', 'UT', 'VKS RF', 'Virgin Orbit', 'Yuzhmash', 'i-Space']


In [30]:
df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Organisation,Location,Date,Detail,Rocket_Status,Price,Mission_Status,Launch Vehicle,Year,Organisation Type,Military,Space Agency,State-Owned Company,Private Company
0,0,0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA",2020-08-07,Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success,Starlink V1 L9 & BlackSky,2020,Private Company,0,0,0,1
1,1,1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...",2020-08-06,Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success,Gaofen-9 04 & Q-SAT,2020,State-Owned Company,0,0,1,0
2,2,2,SpaceX,"Pad A, Boca Chica, Texas, USA",2020-08-04,Starship Prototype | 150 Meter Hop,StatusActive,,Success,150 Meter Hop,2020,Private Company,0,0,0,1
3,3,3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan",2020-07-30,Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success,Ekspress-80 & Ekspress-103,2020,Space Agency,0,1,0,0
4,4,4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA",2020-07-30,Atlas V 541 | Perseverance,StatusActive,145.0,Success,Perseverance,2020,Private Company,0,0,0,1


In [16]:
military_pa = df.groupby("Year")["Military"].sum()
space_agency_pa = df.groupby("Year")["Space Agency"].sum()
state_owned_company_pa = df.groupby("Year")["State-Owned Company"].sum()
private_company_pa = df.groupby("Year")["Private Company"].sum()

In [29]:
organisation_type_per_year = pd.DataFrame({
    'Year': military_pa.index,
    'Military': military_pa.values,
    'Space Agency': space_agency_pa.values,
    'State-Owned Company': state_owned_company_pa.values,
    'Private Company': private_company_pa.values
})
organisation_type_per_year.head()

fig = px.line(
    organisation_type_per_year,
    x="Year",
    y=["Military", "Space Agency", "State-Owned Company", "Private Company"],
    template="plotly_dark", 
    labels={
        "variable": "Organisation Type",
        "value": "Launch Count",
    },
    title="Number of Rocket Launches per Year by Organisation Type",
    width=1200,
    height=600,
)


fig.show()


In [None]:
df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Organisation,Location,Date,Detail,Rocket_Status,Price,Mission_Status,Launch Vehicle,Year,Organisation Type,Military,Space Agency,State-Owned Company,Private Company
0,0,0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA",2020-08-07,Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success,Starlink V1 L9 & BlackSky,2020,Private Company,0,0,0,1
1,1,1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...",2020-08-06,Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success,Gaofen-9 04 & Q-SAT,2020,State-Owned Company,0,0,1,0
2,2,2,SpaceX,"Pad A, Boca Chica, Texas, USA",2020-08-04,Starship Prototype | 150 Meter Hop,StatusActive,,Success,150 Meter Hop,2020,Private Company,0,0,0,1
3,3,3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan",2020-07-30,Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success,Ekspress-80 & Ekspress-103,2020,Space Agency,0,1,0,0
4,4,4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA",2020-07-30,Atlas V 541 | Perseverance,StatusActive,145.0,Success,Perseverance,2020,Private Company,0,0,0,1


In [64]:
# Average cost per year

cost_nan_removed = df.dropna()

# remove commas from column
cost_nan_removed["Price"] = df["Price"].str.replace(",","", regex=True)

# converts column to float
cost_nan_removed["Price"] = cost_nan_removed["Price"].astype(float)

# groups by mean cost per year
average_cost_pa = cost_nan_removed.groupby("Year")["Price"].mean()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [65]:
average_cost_pa.head()

Year
1964     63.230
1965     63.230
1966     59.000
1967    196.625
1968    279.200
Name: Price, dtype: float64

In [66]:
fig = px.line(
    data_frame=average_cost_pa,
    template="plotly_dark",
    labels={
        "value": "Launch Count"
    },
    title="Average cost of Launches Over Time",
    width=1200,
    height=600
)
fig.update_layout(showlegend=False)

fig.show()