# Space Mission Analysis

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
from iso3166 import countries
from dash import Dash, dcc, html, Input, Output

In [2]:
pd.options.display.float_format = '{:,.2f}'.format

In [3]:
df = pd.read_csv('mission_launches.csv')

In [4]:
df_clean = df.drop(columns=['Unnamed: 0.1', 'Unnamed: 0'])

# Number of Launches per Company

In [5]:
df_count_launch = df_clean.groupby('Organisation').count().reset_index()

In [6]:
fig_launch = px.bar(
    data_frame=df_count_launch,
    x = 'Organisation',
    y= 'Location',
    labels={'Location': 'Number of Launches'},
    text = 'Location',
    title='Number of Launch Per Company'
)
fig_launch.update_layout(xaxis={'categoryorder':'total descending'})
fig_launch.update_traces(texttemplate='%{text}', textposition='outside')

fig_launch.show()

# Number of Active versus Retired Rockets

In [7]:
df_clean['Rocket_Status'] = np.where(df_clean['Rocket_Status'] == 'StatusActive', 'Active', 'Retired')

In [8]:
df_active = df_clean.groupby(['Organisation', 'Rocket_Status']).size().reset_index(name='Number')

In [9]:
fig_active = px.bar(
    data_frame=df_active,
    x = 'Organisation',
    y = 'Number',
    text = 'Number',
    color='Rocket_Status',
    labels={'Number': 'Number of rockets', 'Organisation': 'Organisation Name', 'Rocket_Status': 'Status'},
    title='Number of Active versus Retired Rockets',
    barmode='group'
)

fig_active.update_yaxes(range = [0, 2000])
fig_active.update_layout(xaxis={'categoryorder':'total descending'})
fig_active.update_traces(texttemplate='%{text}', textposition='outside')

fig_active.show()

# Distribution of Mission Status

In [10]:
df_status = df_clean.groupby('Mission_Status').size().reset_index(name='Number')

In [11]:
fig_status = px.bar(
    data_frame= df_status,
    x = 'Mission_Status',
    y = 'Number',
    labels={'Mission_Status': 'Mission Status'},
    text= 'Number',
    title= 'Distribution of Mission Status'
)

fig_status.update_layout(xaxis={'categoryorder':'total descending'})

fig_status.show()

# How Expensive are the Launches? 

In [12]:
df_price = df_clean.dropna(subset=['Price'])

In [None]:
df_price['Price'] = df_price['Price'].replace({',': ''}, regex=True).astype(float)

In [14]:
fig_price = px.histogram(
    data_frame= df_price,
    x = 'Price',
    nbins= 250,
    title= 'Launch price',
    labels={'Price': 'Price [mln]', 'count': 'Count'},  
)

fig_price.update_layout(bargap=0.01)

fig_price.show()

In [15]:
df_price_small = df_price[df_price['Price'] < 1000] 

In [16]:
fig_price_small = px.histogram(
    data_frame= df_price_small,
    x = 'Price',
    nbins= 20,
    title= 'Launch price',
    labels={'Price': 'Price [mln]', 'count': 'Count'},
)

fig_price_small.update_layout(bargap=0.01)

mean_value = df_price_small['Price'].mean()

fig_price_small.show()

# Number of Launches by Country


In [17]:
df_location = df_clean

df_location[['Code', 'Base', 'Country', 'Country_Name']] = df_location['Location'].str.split(',', expand=True) 

In [18]:
df_location['Country_Name'] = df_location['Country_Name'].fillna(df_location['Country'])
df_location['Country_Name'] = df_location['Country_Name'].fillna(df_location['Base'])

df_location = df_location.drop(columns=['Country', 'Location', 'Base'])

In [19]:
new_country_names = {
    ' Russia': 'Russian Federation',
    ' New Mexico': 'USA',
    ' Yellow Sea': 'China',
    ' Shahrud Missile Test Site': 'Iran',
    ' Pacific Missile Range Facility': 'USA',
    ' Barents Sea': 'Russian Federation',
    ' Gran Canaria': 'USA',
    ' Pacific Ocean': 'Kiribati'
}

df_location['Country_Name'] = df_location['Country_Name'].replace(new_country_names, regex=True)
df_location['Country_Name'] = df_location['Country_Name'].str.lstrip()

In [20]:
country_code = {country.name: country.alpha3 for country in countries}
df_location['Country_Code'] = df_location['Country_Name'].map(country_code)

df_location['Country_Code'] = df_location['Country_Code'].fillna(df_location['Country_Name'])

In [21]:
missing_codes = {
    'USA': 'USA',
    'Iran': 'IRN',
    'North Korea': 'PRK',
    'South Korea': 'KOR'
}

df_location['Country_Code'] = df_location['Country_Code'].replace(missing_codes)

In [22]:
df_location_count = df_location.groupby('Country_Code').agg({'Country_Name': 'first', 'Code': 'count'}).reset_index()
df_location_count = df_location_count.rename(columns={'Code': 'Count'})

In [23]:
fig_map = px.choropleth(
    data_frame=df_location_count,
    locations='Country_Code',
    color= 'Count',
    hover_name= 'Country_Name',
    color_continuous_scale='viridis',
    title='Number of Launches by Country',
    labels={'Country_Code': 'Country Code', 'Count': 'Number of Launches'}
    )

fig_map.update_layout(
    geo=dict(
        showcoastlines=True,  
        coastlinecolor="white",  
        showland=True,  
        landcolor="lightgray"
    ),

)

fig_map.show()

# Number of Failures by Country


In [24]:
df_count_fail = df_location.groupby(['Country_Code', 'Country_Name', 'Mission_Status']).count().reset_index()
df_count_fail = df_count_fail.rename(columns={'Code': 'Count'})

In [25]:
app = Dash(__name__)

status_options = [{'label': status, 'value': status} for status in df_count_fail['Mission_Status'].unique()]

app.layout = html.Div([
    html.H1('Number and Mission Status by Country'),
    dcc.Dropdown(
        id='status-dropdown',
        options=status_options,
        value='Success',  
        clearable=False,
        style={'width': '50%'}
    ),
    dcc.Graph(id='choropleth-map')
])

@app.callback(
    Output('choropleth-map', 'figure'),
    [Input('status-dropdown', 'value')]
)
def update_map(selected_status):
    filtered_df = df_count_fail[df_count_fail['Mission_Status'] == selected_status]

    fig = px.choropleth(
        data_frame= filtered_df,
        locations='Country_Code',
        color='Count',
        hover_name='Country_Code',
        locationmode='ISO-3',
        color_continuous_scale=px.colors.sequential.Plasma,
        title=f'Number of mission with status: {selected_status}',
        labels={'Country_Code': 'Country Code', 'Count': 'Number of mission'},
        hover_data=['Country_Name', 'Count']
    )
    fig.update_traces(hovertemplate='<br>Country: %{customdata[0]}  <br>Number of Missions: %{customdata[1]} ')
    return fig

if __name__ == '__main__':
    app.run_server(debug=True)

#Chart of the countries, organisations, and mission status. 

In [26]:
df_sunburs = df_location.groupby(['Country_Name', 'Organisation', 'Mission_Status']).count().reset_index()
df_sunburs = df_sunburs.rename(columns={'Code': 'Count'})

In [27]:
fig_sunburst = px.sunburst(
    data_frame= df_sunburs,
    path= ['Country_Name', 'Organisation', 'Mission_Status'],
    values= 'Count',
    color = 'Country_Name',
)

fig_sunburst.update_traces(hovertemplate=('<br><b>Country Name</b>: %{customdata[0]}<br>'+\
                            '<br><b>Number of Missions</b>: %{value}<br>'))

fig_sunburst.show()

# Total Amount of Money Spent by Organisation on Space Missions

In [28]:
df_money_spend = df_price.groupby('Organisation').sum().reset_index()

df_money_spend['Price'] = df_money_spend['Price'].round(2)

In [29]:
fig_money_spend = px.bar(
    data_frame=df_money_spend,
    x = 'Organisation',
    y = 'Price',
    labels={'Price': 'Total Amount of Money Spent [mln $]', 'Organisation': 'Organisation Name'},
    title= 'Total Amount of Money Spent by Organisation on Space Missions',
    text= 'Price'
)

fig_money_spend.update_layout(xaxis={'categoryorder':'total descending'})
fig_money_spend.update_layout(yaxis={'range': [0, df_money_spend['Price'].max() * 1.1]}) 
fig_money_spend.update_traces(textposition='outside')

fig_money_spend.show()

In [None]:
df_avg_launch = df_price
df_avg_launch['Avg_Price'] = df_avg_launch.groupby('Organisation')['Price'].transform('mean')
df_avg_launch['Avg_Price'] = df_avg_launch['Avg_Price'].round(2)
df_avg_launch = df_avg_launch.drop_duplicates('Organisation')

In [31]:
fig_avg_launch = px.bar(
    data_frame=df_avg_launch,
    x = 'Organisation',
    y = 'Avg_Price',
    labels={'Avg_Price': 'Money Spend [mln $]', 'Organisation': 'Organisation Name'},
    title= 'Amount of Money Spent by Organisation per Launch',
    text= 'Avg_Price'
)

fig_avg_launch.update_layout(yaxis={'range': [0, df_avg_launch['Price'].max() * 1.1]}) 
fig_avg_launch.update_layout(xaxis={'categoryorder':'total descending'})
fig_avg_launch.update_traces(textposition='outside')

fig_avg_launch.show()

# Number of Launches per Year

In [32]:
def extract_year(date_string):
    try:
        return pd.to_datetime(date_string).year
    except ValueError:
        return pd.to_datetime(date_string, format='%a %b %d, %Y').year

In [33]:
df_date = df_location

df_date['Year'] = df_date['Date'].apply(extract_year)

df_launch_per_year = df_date.groupby('Year').count().reset_index()

In [34]:
fig_launch_per_year = px.bar(
    data_frame=df_launch_per_year,
    x = 'Year',
    y = 'Organisation',
    labels={'Organisation': 'Number of Launch'},
    color='Organisation',
    title='Number of Launches per Year'
)

fig_launch_per_year.show()

# Cold War Space Race: USA vs USSR

In [None]:
df_cold_war = df_date[(df_date['Year'] <= 1991) & ((df_date['Country_Name'] == 'USA') | (df_date['Organisation'] == 'RVSN USSR'))]

df_cold_war['Country_Name'] = df_cold_war['Country_Name'].replace(['Kazakhstan', 'Russian Federation'], 'USSR')

## Total number of launches of the USSR and the USA

In [36]:
df_cold_war_count = df_cold_war.groupby('Country_Name').count().reset_index()

In [37]:
custom_palette = ["#d53030", "#3033d5"]

fig_launch_cold_war = px.pie(data_frame=df_cold_war_count, 
                             values='Code', 
                             names='Country_Name',  
                             color_discrete_sequence=custom_palette,
                             hole = 0.5)




fig_launch_cold_war.update_traces( textinfo='value', textfont_size=20)

fig_launch_cold_war.show()

## CTotal Number of Launches Year-On-Year by the Two Superpowers

In [38]:
df_cold_war_count_year = df_cold_war.groupby(['Country_Name', 'Year']).count().reset_index()

df_cold_war_count_year['Year'] = pd.to_datetime(df_cold_war_count_year['Year'], format='%Y')

In [39]:
app = Dash(__name__)

app.layout = html.Div([
    dcc.Graph(id='graph'),
    dcc.Slider(
        id='year-slider',
        min = 1959,
        max=df_cold_war_count_year['Year'].max().year,
        value=df_cold_war_count_year['Year'].mean().year,
        marks={year.year: str(year.year) for year in pd.date_range(start='1959',end='1992', freq='Y')},
        step=None
    )
])

@app.callback(
    Output('graph', 'figure'),
    [Input('year-slider', 'value')]
)
def update_figure(selected_year):
    filtered_df = df_cold_war_count_year[df_cold_war_count_year['Year'].dt.year <= selected_year]
    fig = px.line(filtered_df, 
                  x='Year', 
                  y='Detail', 
                  color='Country_Name',
                  color_discrete_sequence=["#3033d5", "#d53030"],
                  labels={'Country_Name': 'Country', 'Detail': 'Number of Launches'},
                  title='Number of Launches by Country from 1957 to ' + str(selected_year))
    fig.update_traces(mode="markers+lines", hovertemplate=None)
    fig.update_layout(hovermode="x")
    return fig

if __name__ == '__main__':
    app.run_server(debug=True)

## Chart the Percentage of Failures over Time

Did failures go up or down over time? Did the countries get better at minimising risk and improving their chances of success over time? 

In [None]:
df_cold_war['Status'] = df_cold_war['Mission_Status'].apply(lambda x: 'Success' if x == 'Success' else 'Failure')

df_failure_percent= df_cold_war.groupby(['Country_Name', 'Year']).agg(Count=('Status', 'size'), Failure_Percent=('Status', lambda x: (x == 'Failure').mean() * 100)).reset_index()

df_failure_percent['Failure_Percent'] = df_failure_percent['Failure_Percent'].round(2)

In [41]:
app = Dash(__name__)

app.layout = html.Div([
    dcc.Graph(id='graph'),
    dcc.Slider(
        id='year-slider',
        min = 1957,
        max=df_failure_percent['Year'].max(),
        value=df_failure_percent['Year'].mean(),
        marks={year.year: str(year.year) for year in pd.date_range(start='1957',
                                                                    end='1992', 
                                                                    freq='Y')},
        step=None
    )
])

@app.callback(
    Output('graph', 'figure'),
    [Input('year-slider', 'value')]
)
def update_figure(selected_year):
    filtered_df = df_failure_percent[df_failure_percent['Year'] <= selected_year]
    fig_failure_percent = px.bar(
    data_frame=filtered_df,
    x='Year',
    y = 'Failure_Percent',
    color='Country_Name',
    barmode='group',
    labels={'Failure_Percent': 'Failure Mission [%]', 'Country_Name': 'Country'},
    hover_data='Count'
)

    fig_failure_percent.update_traces(hovertemplate='<br>Year: %{x}  <br>Total Number of Missions: %{customdata[0]} <br>Failure Mission %{y}%')
    return fig_failure_percent

if __name__ == '__main__':
    app.run_server(debug=True)