In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [2]:
spaceX_founding = datetime(2002, 3, 14)
spaceX_first_launch = datetime(2008, 9, 28)
spaceX_first_test_landing = datetime(2013, 9, 29)
spaceX_first_soft_touch_down = datetime(2014, 4, 8)
spaceX_first_landing = datetime(2015, 12, 21)
space_race_start = datetime(1957, 8, 21)
first_moon_landing = datetime(1969, 7, 20)

In [3]:
launches = pd.read_json('../data/launch-library/launches.json', encoding='utf=8')

In [4]:
launches.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6767 entries, 0 to 6766
Data columns (total 35 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   id                                  6767 non-null   object 
 1   url                                 6767 non-null   object 
 2   slug                                6767 non-null   object 
 3   flightclub_url                      67 non-null     object 
 4   r_spacex_api_id                     39 non-null     object 
 5   name                                6767 non-null   object 
 6   status                              6767 non-null   object 
 7   last_updated                        6767 non-null   object 
 8   updates                             6767 non-null   object 
 9   net                                 6767 non-null   object 
 10  window_end                          6767 non-null   object 
 11  window_start                        6767 no

In [5]:
launches_per_month = pd.DataFrame(launches[['id', 'net', 'status']])
launches_per_month['year'] = pd.DatetimeIndex(launches['net']).year
launches_per_month['month'] = pd.DatetimeIndex(launches['net']).month
launches_per_month['day'] = 1
launches_per_month['year_month'] = pd.to_datetime(launches_per_month[['year', 'month', 'day']])
launches_per_month.drop(['year', 'month', 'day'], axis=1, inplace=True)

launches_per_month['new'] = 1
launches_per_month['new_success'] = launches_per_month['status'].apply(lambda s: int(s['abbrev']=='Success'))
launches_per_month['new_failure'] = launches_per_month['status'].apply(lambda s: int(s['abbrev']=='Failure'))
launches_per_month['new_partial'] = launches_per_month['status'].apply(lambda s: int(s['abbrev']=='Partial Failure'))
launches_per_month['new_tbd'] = launches_per_month['status'].apply(lambda s: s['abbrev']=='TBD')

launches_per_month['total'] = 0
launches_per_month['total_success'] = 0
launches_per_month['total_failure'] = 0
launches_per_month['total_partial'] = 0
launches_per_month['total_tbd'] = 0

launches_per_month = launches_per_month.groupby(['year_month']).sum(numeric_only=True).reset_index()

for (i, row) in launches_per_month.iterrows():
  if i == 0:
    launches_per_month.loc[i, 'total'] = launches_per_month.loc[i, 'new']
    launches_per_month.loc[i, 'total_success'] = launches_per_month.loc[i, 'new_success']
    launches_per_month.loc[i, 'total_failure'] = launches_per_month.loc[i, 'new_failure']
    launches_per_month.loc[i, 'total_partial'] = launches_per_month.loc[i, 'new_partial']
    launches_per_month.loc[i, 'total_tbd'] = launches_per_month.loc[i, 'new_tbd']
  else:
    launches_per_month.loc[i, 'total'] = launches_per_month.loc[i-1, 'total'].sum() + launches_per_month.loc[i, 'new']
    launches_per_month.loc[i, 'total_success'] = launches_per_month.loc[i-1, 'total_success'].sum() + launches_per_month.loc[i, 'new_success']
    launches_per_month.loc[i, 'total_failure'] = launches_per_month.loc[i-1, 'total_failure'].sum() + launches_per_month.loc[i, 'new_failure']
    launches_per_month.loc[i, 'total_partial'] = launches_per_month.loc[i-1, 'total_partial'].sum() + launches_per_month.loc[i, 'new_partial']
    launches_per_month.loc[i, 'total_tbd'] = launches_per_month.loc[i-1, 'total_tbd'].sum() + launches_per_month.loc[i, 'new_tbd']

launches_per_month

Unnamed: 0,year_month,new,new_success,new_failure,new_partial,new_tbd,total,total_success,total_failure,total_partial,total_tbd
0,1957-10-01,1,1,0,0,0,1,1,0,0,0
1,1957-11-01,1,1,0,0,0,2,2,0,0,0
2,1957-12-01,1,0,1,0,0,3,2,1,0,0
3,1958-02-01,2,1,1,0,0,5,3,2,0,0
4,1958-03-01,3,2,1,0,0,8,5,3,0,0
...,...,...,...,...,...,...,...,...,...,...,...
823,2032-12-01,2,0,0,0,2,6763,6010,486,29,228
824,2034-12-01,1,0,0,0,1,6764,6010,486,29,229
825,2038-12-01,1,0,0,0,1,6765,6010,486,29,230
826,2039-12-01,1,0,0,0,1,6766,6010,486,29,231


In [6]:
launches_per_year = pd.DataFrame(launches[['id', 'net', 'status']])
launches_per_year['year'] = pd.DatetimeIndex(launches['net']).year
launches_per_year['month'] = 1
launches_per_year['day'] = 1
launches_per_year['year'] = pd.to_datetime(launches_per_year[['year', 'month', 'day']])
launches_per_year.drop(['month', 'day'], axis=1, inplace=True)

launches_per_year['new'] = 1
launches_per_year['new_success'] = launches_per_year['status'].apply(lambda s: int(s['abbrev']=='Success'))
launches_per_year['new_failure'] = launches_per_year['status'].apply(lambda s: int(s['abbrev']=='Failure'))
launches_per_year['new_partial'] = launches_per_year['status'].apply(lambda s: int(s['abbrev']=='Partial Failure'))
launches_per_year['new_tbd'] = launches_per_year['status'].apply(lambda s: s['abbrev']=='TBD')

launches_per_year['total'] = 0
launches_per_year['total_success'] = 0
launches_per_year['total_failure'] = 0
launches_per_year['total_partial'] = 0
launches_per_year['total_tbd'] = 0

launches_per_year = launches_per_year.groupby(['year']).sum(numeric_only=True).reset_index()

for (i, row) in launches_per_year.iterrows():
  if i == 0:
    launches_per_year.loc[i, 'total'] = launches_per_year.loc[i, 'new']
    launches_per_year.loc[i, 'total_success'] = launches_per_year.loc[i, 'new_success']
    launches_per_year.loc[i, 'total_failure'] = launches_per_year.loc[i, 'new_failure']
    launches_per_year.loc[i, 'total_partial'] = launches_per_year.loc[i, 'new_partial']
    launches_per_year.loc[i, 'total_tbd'] = launches_per_year.loc[i, 'new_tbd']
  else:
    launches_per_year.loc[i, 'total'] = launches_per_year.loc[i-1, 'total'].sum() + launches_per_year.loc[i, 'new']
    launches_per_year.loc[i, 'total_success'] = launches_per_year.loc[i-1, 'total_success'].sum() + launches_per_year.loc[i, 'new_success']
    launches_per_year.loc[i, 'total_failure'] = launches_per_year.loc[i-1, 'total_failure'].sum() + launches_per_year.loc[i, 'new_failure']
    launches_per_year.loc[i, 'total_partial'] = launches_per_year.loc[i-1, 'total_partial'].sum() + launches_per_year.loc[i, 'new_partial']
    launches_per_year.loc[i, 'total_tbd'] = launches_per_year.loc[i-1, 'total_tbd'].sum() + launches_per_year.loc[i, 'new_tbd']

launches_per_year

Unnamed: 0,year,new,new_success,new_failure,new_partial,new_tbd,total,total_success,total_failure,total_partial,total_tbd
0,1957-01-01,3,2,1,0,0,3,2,1,0,0
1,1958-01-01,28,8,20,0,0,31,10,21,0,0
2,1959-01-01,28,14,11,3,0,59,24,32,3,0
3,1960-01-01,45,23,22,0,0,104,47,54,3,0
4,1961-01-01,62,39,18,5,0,166,86,72,8,0
...,...,...,...,...,...,...,...,...,...,...,...
75,2032-01-01,2,0,0,0,2,6763,6010,486,29,228
76,2034-01-01,1,0,0,0,1,6764,6010,486,29,229
77,2038-01-01,1,0,0,0,1,6765,6010,486,29,230
78,2039-01-01,1,0,0,0,1,6766,6010,486,29,231


In [7]:
passive_color = '#bed9ec'
success_color = '#9ce68a'
annotation_bgcolor = '#222'

annotation_style = {'bgcolor': annotation_bgcolor, 'bordercolor': annotation_bgcolor, 'borderwidth': 5}

x_value = launches_per_year['year']
x_value_months = launches_per_month['year_month']

fig = go.Figure(
  layout={
    'template': 'plotly_dark',
    'title': 'Global Rocket Launches per year',
    'height': 600,
    'margin_b': 150,
    'margin_r': 150,
    'xaxis_range': ['2000-11','2022-11'],
    'yaxis': {
      'side': 'right',
      'title': 'Launches',
      'title_font_color': '#ececec',
      'title_font_size': 15,
    },
    'legend_x': 0,
  }
)

fig.add_trace(go.Scatter(
  name='All yearly',
  x=x_value,
  y=launches_per_month['new'],
  line_shape='spline'
))

fig.add_trace(go.Scatter(
  name='All monthly',
  x=x_value_months,
  y=launches_per_month['new'],
  line_shape='spline'
))

fig.add_trace(go.Scatter(
  name='Success',
  x=x_value,
  y=launches_per_month['new_success'],
  line_shape='spline',
  visible='legendonly'
))

fig.add_trace(go.Scatter(
  name='Failure',
  x=x_value,
  y=launches_per_month['new_failure'],
  line_shape='spline',
  visible='legendonly'
))

fig.add_vline(
  x=spaceX_founding.timestamp() * 1000,
  annotation={'text': 'SpaceX founded', 'font_color': passive_color, 'y': 0.5, **annotation_style},
  annotation_position='top',
  line={'dash': 'longdash', 'color': passive_color}
)
fig.add_vline(
  spaceX_first_launch.timestamp() * 1000,
  annotation={'text': 'SpaceX first launch', 'font_color': passive_color, 'y': 0.55, **annotation_style},
  annotation_position='top',
  line={'dash': 'longdash', 'color': passive_color}
)
fig.add_vline(
  spaceX_first_landing.timestamp() * 1000,
  annotation={'text': 'SpaceX first landing', 'font_color': success_color, 'y': 0.6, **annotation_style},
  annotation_position='top',
  line={'dash': 'longdash', 'color': success_color}
)

fig.show()

In [8]:
def format_coefs(coefs):
  equation_list = [f'{coef}x^{i}' for i, coef in enumerate(coefs)]
  equation = '$' + ' +  '.join(equation_list) + '$'

  replace_map = {'x^0': '', 'x^1': 'x', '+ -': '- '}
  for old, new in replace_map.items():
    equation = equation.replace(old, new)
  
  return equation

In [9]:
df = launches_per_month
X = df['year_month'].apply(lambda c: c.timestamp() * 1000).values.reshape(-1, 1)
x_range = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)

fig = px.scatter(df, x='year_month', y='new')
for degree in [1, 2, 3, 4]:
	poly = PolynomialFeatures(degree)
	poly.fit(X)
	X_poly = poly.transform(X)
	x_range_poly = poly.transform(x_range)

	model = LinearRegression(fit_intercept=False)
	model.fit(X_poly, df['new'])
	y_poly = model.predict(x_range_poly)

	fig.add_trace(go.Scatter(x=x_range.squeeze(), y=y_poly, name=degree-1))

fig.update_layout({
	'template': 'plotly_dark',
	'xaxis_range': ['1955', '2023']
})
fig.show()


In [85]:
passive_color = '#bed9ec'
success_color = '#9ce68a'
annotation_bgcolor = '#222'
annotation_style = {'bgcolor': annotation_bgcolor, 'bordercolor': annotation_bgcolor, 'borderwidth': 5}

x_value = launches_per_month['year_month']

first_moon_landing_epoch = (datetime.fromtimestamp(0) - first_moon_landing).total_seconds() * -1000

fig = px.scatter(
	launches_per_month,
	x='year_month',
	y='new',
  trendline='rolling',
  # trendline_scope="overall",
	trendline_color_override='#f00',
	trendline_options={
    'window': 25,
  },
)

fig.data[0].opacity = 0.5

fig.update_layout({
  'template': 'plotly_dark',
  'title': 'Global Rocket Launches over time',
  'height': 600,
  'margin_b': 150,
  'margin_r': 150,
  'xaxis': {
    'range': ['1955','2023'],
    'title': 'Time'
  },
  'yaxis': {
    'side': 'right',
    'title': 'Launches per month',
  },
  'legend_x': 0,
})

fig.add_vline(
  x=spaceX_founding.timestamp() * 1000,
  annotation={
    'text': 'SpaceX founded',
    'font_color': passive_color,
    'y': 0.7,
    **annotation_style
  },
  annotation_position='top',
  line={'dash': 'longdash', 'color': passive_color}
)
fig.add_vline(
  x=spaceX_first_launch.timestamp() * 1000,
  annotation={
    'text': 'SpaceX first launch',
    'font_color': passive_color,
    'y': 0.75,
    **annotation_style
  },
  annotation_position='top',
  line={'dash': 'longdash', 'color': passive_color}
)
fig.add_vline(
  x=spaceX_first_landing.timestamp() * 1000,
  annotation={
    'text': 'SpaceX first landing',
    'font_color': success_color,
    'y': 0.8,
    **annotation_style
  },
  annotation_position='top',
  line={'dash': 'longdash', 'color': success_color}
)
fig.add_vline(
  x=first_moon_landing_epoch,
  annotation={
    'text': 'First moon landing',
    'font_color': passive_color,
    'y': 0.7,
    **annotation_style
  },
  annotation_position='top',
  line={'dash': 'longdash', 'color': passive_color}
)
fig.add_vrect(
  x0=space_race_start,
  x1=first_moon_landing,
  line_width=0,
  fillcolor=passive_color,
  opacity=0.1,
  annotation={
    'text': f'Space Race<br>{space_race_start.year} - {first_moon_landing.year}',
    'font_color': passive_color,
    'y': 0.6,
    **annotation_style
  },
  annotation_position='left'
)

fig.add_annotation(go.layout.Annotation(
  text=f'During the <b style="color: {passive_color}">Space Race</b> we saw an incredible increase in<br>rocket launches mainly by the <b>Soviet Space Program</b> and <b>NASA</b>.',
  align='left',
  showarrow=False,
  x=first_moon_landing_epoch,
  yref='paper',
  y=-0.15,
  yanchor='top',
  bgcolor=annotation_bgcolor,
  bordercolor=annotation_bgcolor,
  borderwidth=10
))
fig.add_annotation(go.layout.Annotation(
  text='Currently we are wittnessing an even steeper<br>increase in rockets launched than during the Space Race.',
  align='left',
  showarrow=False,
  x=spaceX_first_launch,
  yref='paper',
  y=-0.15,
  yanchor='top',
  bgcolor=annotation_bgcolor,
  bordercolor=annotation_bgcolor,
  borderwidth=10
))

fig.show()

In [55]:
(datetime.fromtimestamp(0) - first_moon_landing).total_seconds() - ((first_moon_landing - space_race_start) / 2).total_seconds()

-173703600.0