In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Space Mission Analysis 
![](https://images.unsplash.com/photo-1464802686167-b939a6910659?ixlib=rb-1.2.1&ixid=eyJhcHBfaWQiOjEyMDd9&auto=format&fit=crop&w=1033&q=80/)
### To infinity and beyond!

For more than 60 years, Human has the ambitious to conquer and explore an area beyond our planet : space.  
Curious to learn about the different missions since the beginning of the space race in 1957, I've decided to study and making interesting visualisation of them.  
This dataset found on kaggle was scraped from https://nextspaceflight.com/launches/past/?page=1 and includes all the space mission from 1957 to August 2020.  
Let's start the work !

# Importation Librairy python

In [None]:
import pandas as pd 
import numpy as np
import plotly.express as px
from iso3166 import countries
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt

# Importation of the dataframe

In [None]:
space = pd.read_csv("/kaggle/input/all-space-missions-from-1957/Space_Corrected.csv")
space.head(5)

**Initially composed by 9 columns, we will create new one in the first instance**

## Creation of new columns
Location column seems to have many useful information like the name of launch center and the country associated of each launch

In [None]:
#Modification Location
space["Country"] = space["Location"].map(lambda loc : loc.split(",")[-1].strip())
space["Launcher"] = space["Location"].map(lambda loc : loc.split(",")[0].strip())
space["Site name"] = space["Location"].map(lambda loc : loc.split(",")[1].strip())

## Site
If I have created Launcher column it's because of some mistakes during the creation of these new columns. Some information have switched indeed so I will correct them.

In [None]:
# Wrong values 
sites = ['Xichang Satellite Launch Center','Imam Khomeini Spaceport','Blue Origin Launch Site',
         'Taiyuan Satellite Launch Center','Tai Rui Barge','Uchinoura Space Center',
         'Jiuquan Satellite Launch Center','Svobodny Cosmodrome']

#space["Launcher"] in sites
ind = []
for site in sites: 
    space["Site name"].iloc[space[space["Launcher"] == site].index] = site
    
space.replace('Alc?›ntara Launch Center','Alcantara Launch Center',inplace = True)
space.replace('M?\x81hia Peninsula','Mahia Peninsula',inplace = True)
#space['Site name'].unique() 

## Country

In [None]:
space["Country"].unique()

As we can see, some information are wrong. Let's modify them with real country.  
After some research, I've seen that many of launch recorded in Kazakhstan were in reality organised by the URSS ("Russia").  
I will take this into account because I will compare the USA and the URSS later

In [None]:
space["Site name"].replace('Kauai', 'Pacific Missile Range Facility',inplace = True)
space["Country"].replace('Yellow Sea', 'China',inplace = True)
space["Country"].replace('Shahrud Missile Test Site', 'Iran',inplace = True)
space["Country"].replace('Pacific Missile Range Facility', 'USA',inplace = True)
space["Country"].replace('Barents Sea', 'Russia',inplace = True)
space["Country"].replace('Pacific Ocean', 'USA',inplace = True)
space["Country"].replace('New Mexico', 'USA',inplace = True)
space["Country"].replace('Gran Canaria', 'Spain',inplace = True)

x = ["VKS RF","RVSN USSR","ROSCOSMOS"]
for c in x :
    space.loc[space['Company Name'] == c, "Country"] = "Russia"
del space["Unnamed: 0.1"]
del space["Location"]
del space["Launcher"]

## Datum
Now, let's handle with the datum column to find : years, hours, days, days number and months in data.

In [None]:
#Year
space["Year"] = space["Datum"].map(lambda date : date.split(",")[-1]).map(lambda date : date.split(" ")[1])

# Day, month and Day number
space["Day"] = space["Datum"].map(lambda date : date.split(",")[0]).map(lambda day : day.split(" ")[0])
space["Month"] = space["Datum"].map(lambda date : date.split(",")[0]).map(lambda month : month.split(" ")[1])
space["Day Number"] = space["Datum"].map(lambda date : date.split(",")[0]).map(lambda day : day.split(" ")[2])

#Hour 
space["Hour"] = space["Datum"].map(lambda date : date.split(",")[-1])
Hours= []
for hour in space["Hour"]:
    if len(hour.split(" ")) > 2 :
        Hours.append(hour.split(" ")[2])
    else:
        Hours.append("")
space["Hour"] = Hours
space["Datum"] = pd.to_datetime(space["Datum"], utc = True)

## Details

In [None]:
space["Rocket Name"] = space["Detail"].map(lambda name : name.split("|")[1].strip())
space["Rocket Type"] = space["Detail"].map(lambda name : name.split("|")[0].strip())
del space["Detail"]

## "Unnamed: 0"
The first column will become the number of launches starting with the first one in history

In [None]:
#space.reset_index(inplace = True)
space["Unnamed: 0"] = (space["Unnamed: 0"] + 1 - max(space["Unnamed: 0"]))*(-1)+2

## Geo Analysis

In [None]:
#Geo analysist with scatter go 

country_alpha3 = {}
for c in countries:
    country_alpha3[c.name] = c.alpha3
    
space['alpha3'] = space['Country']
space = space.replace({"alpha3": country_alpha3})

space["alpha3"].unique()

#These countries have problem with the iso3166 norme, so I do the modification manually :
space.loc[space['Country'] == "North Korea", 'alpha3'] = "PRK"
space.loc[space['Country'] == "South Korea", 'alpha3'] = "KOR"
space.loc[space["Country"] == "Russia", 'alpha3'] = 'RUS'
space.loc[space["Country"] == "Iran", 'alpha3'] = "IRN"

space["alpha3"].unique()

# Rename columns
Because some column name doesn't mean anything to me, I change some of them :

In [None]:
space.rename(columns={" Rocket" : "Rocket price","Unnamed: 0" : "Launch Number"}, inplace = True)

## Modification type col
Lastly the cleaning stage, I will modify the type of some column to use them better in the future

In [None]:
# Price
space["Rocket price"] = space["Rocket price"].map(lambda x : str(x).strip()).map(lambda x : x.replace(",",""))
space["Rocket price"][1]
space["Rocket price"] = space["Rocket price"].map(lambda x : float(x))

# Hour / Day 

space["Hour"] = space["Hour"].map(lambda x : str(x).split(":")[0])
#I use -1 for the NaN values because np.Nan is only available for float type 
space["Hour"].replace("",-1,inplace = True)
space.Hour = space.Hour.astype('int32')
space["Day Number"] = space["Day Number"].astype('int64')
space.Year = space.Year.astype('int64')

**The first step is now finished, we will be able to analyse these data and represent them in the best way possible.**

In [None]:
space.head(5)

# Analysis

In [None]:
space.shape

Now, the dataset is clean which contains 14 columns and 4324 row.  
This is a good new ! 😍  
We will start the Second part of the project : Data exploration ! Let's try to find correlation, hidden information or important difference between all these info.  
The first info that I will check is the first launch of a rocket in the space :

In [None]:
space.tail(1)

So, we can see that this first launch was the : __October 04th 1957 at 19:28:00__  from the __Baikonur Cosmodrome__  
It was organized by the __USRR ("Russia")__ and the mission was a __success__  
да здравствует россия ! ("Long life Russia")  
Name of the rocket : __Sputnik-1__  
Next, we can overfly the columns' content with the function "describe" :

In [None]:
space.describe(include="all")

__Infos to keep in mind :__ 
- Total of 4324 launches 
- There are 56 companies
- 16 Countries
- Russia seems to be the most represented in this data set
- There are 4 status of missions (We will se why later)
- The largest par of the rocket launched are now retired
- __Important__ : "Rocket price" is the only column which have NaN values

---------------

# My main goal will be to answer to these 3 subjects :  

- __How the launch has grown from the first launch to today?__
- __Who was the best during the cold war?__
- __How space X's investment in its rockets has increased?__ 



# How the launch has grown from the first launch to today ?
We can start by representing curve associated to the evolution of Launch number :

### Evolution of Launches

In [None]:
fig = px.line(space, x='Datum', y="Launch Number",color_discrete_sequence = px.colors.sequential.RdBu, template = "plotly_dark",height = 600)
fig.show()

The evolution is close to being linear. This reflect the constant interest in space discovery.  
However, the curve seems to have a higher growing rate before 1980 and in order to see more accurate this evolution, I will use a bar plot to compare each year

### Number of launches per year

In [None]:
# Count_Year contain the sum of launches per year
Count_Year =space["Year"].value_counts().reset_index().rename(columns={"index" : "Year","Year" : "Count"}).sort_values("Year")
fig = px.bar(Count_Year, y='Count', x='Year', text='Count', color = 'Count',color_continuous_scale= px.colors.sequential.Reds, 
             template = "plotly_dark",height = 600)
fig.show()

The evolution is well noticeable with this graph and particularly the different between the years !                     

We can observe __3 important variation__ on it :
- The highest evolution from 1957 to 1978 corresponding to one portion of the Cold War 
- The number of launch dropped drastically until 2015 and have been almost divided by two in comparison to the previous period
- A new growth since 2016 !  

To see in deeper the distribution according to the countries, we can just change the color parameter and obtain this graph :

### Number of Launches per year and by country

In [None]:
#Count_Year_country
Count_Year_country = space.groupby(["Year","Country","alpha3"])["Launch Number"].count().reset_index()

fig = px.bar(Count_Year_country, y='Launch Number', x='Year',color="Country",
             color_discrete_sequence = px.colors.qualitative.Dark24,template = "plotly_dark"
             ,height = 600)
fig.show()

__Whooooooo !__
    __Russia and the USA__ had almost the total space's monopoly until 1990.  
This graph shows the importance of these two countries in the space discovery and that why we will study both later.  
We can also note that __France__ starts to enter in the competition from 90's and __China__ from less than 5 years. 

### Geographic evolution of launches number per year and by country

In [None]:
fig = px.scatter_geo(Count_Year_country,locations = "alpha3",
                     hover_name="Country", 
                     size="Launch Number",
                     color = "Launch Number",
                     animation_frame="Year",
                     projection="natural earth",
                     color_continuous_scale= px.colors.sequential.Reds,
                     template = "plotly_dark",
                     height = 600)
fig.show()

__Click on play button !__
This animation shows us the evolution and distribution of launches number per country since 1957.  

It's an other way to see again that Russia and the USA represented the biggest part of launches !  

It's important to take into account that the launches was not in the country marked, but just organized by these ones.  
For example, French rocket are launched from Guyane (one of its regions) which is in South America.  

Now we will take a look at __the distribution of success mission__ !  

I will make these two graph to have a great summary of information :

### Success Mission repartion

In [None]:
status = space["Status Mission"].value_counts().reset_index()
fig = px.pie(status, values='Status Mission', names='index',
             title='Success distribution', color_discrete_sequence = px.colors.sequential.RdBu, 
             template = "plotly_dark",height = 400)
             
fig.update_traces(textposition='outside', textinfo='percent+label')
fig.show()
Tab = space.groupby(["Year","Status Mission"])["Launch Number"].count().reset_index()
fig = px.bar(Tab, y='Launch Number', x='Year',color="Status Mission",title='Success distribution by Year',
             color_discrete_sequence = px.colors.sequential.Blackbody,template = "plotly_dark"
             ,height = 400)
fig.show()

Firstly, we see that more than __89% of all launches__ have been __successful__, which is a great score
To see the evolution according to years, I represented the data with bar plot and color with "Status mission".  
We are easily able to see that the percentage of failure have reduced with time, and it happens very few times today.
I could have been interesting to see the number of mortal failure to compare it during the time but the data set doesn't provide us the information.. 

### Status mission by country

To continue about failure distribution, we can also compare it between each country.  
Because of the __big difference between Russia, USA__ and the other, I've decided to put a mask on these two one.  
Thanks to that, we will have a better view of the status of mission for countries __with few launches__. 

In [None]:
x = space.groupby(["Country", "Status Mission"])["Launch Number"].count().reset_index()
x = x[~x.Country.isin(['USA','Russia'])]
fig = px.bar(x, x="Country", y="Launch Number", color="Status Mission",
             color_discrete_sequence = px.colors.sequential.Blackbody,
             template = "plotly_dark",
                  height=400)
fig.show()

The distribution seems to be coherent and proportional to the launches number. 
However, me see that __Brazil has never succeeded__ in its launches attempts (3 failures including one pre-launch failure) 
Also, we can see that Kenya has never failed in its launches with a total of 9 ! it's only the beginning but this result is very promising !

### Rockets status

In [None]:
x = space.groupby(["Country","Status Rocket"])["Launch Number"].count().reset_index().rename(columns = {"Launch Number" : "Count"})
fig = px.sunburst(x, 
                  path=['Status Rocket','Country'], 
                  values='Count',
                  color_discrete_sequence = px.colors.qualitative.Set1, 
                  template = "plotly_dark",
                  height=400)
fig.show()
fig = px.bar(x, x="Country", y="Count", color="Status Rocket", 
             color_discrete_sequence = px.colors.qualitative.Set1,
             template = "plotly_dark",
                  height=400)
fig.show()

As we could have predicted, most of rocket are now retired.  
__Russia and the USA__ compose the biggest part of them, but if we click "Status Active", we can notice that the distribution is better balanced between countries. 
The second graph showcases __China__ by showing that the most par of their rocket are still activated contrary to __Russia__ !   

Let's continue with the number of company by country. The graph below shows this distribution : 

### The companies by country

In [None]:
x = space.groupby(["Country","Company Name"])["Launch Number"].count().reset_index().groupby(["Country"])["Company Name"].count().reset_index().sort_values("Company Name", ascending = True)
fig = px.bar(x, x="Company Name", 
             y="Country", 
             orientation='h',
             color_discrete_sequence = px.colors.sequential.RdBu,  
             template = "plotly_dark",
             height = 400)
fig.show()

Let's see the company with most of launches.  

Because of the high number of company and to have a better representation, I've decided to show only the one with more than 10 launches.  
Then, we observe the 27 companies with the most launches at their counter 

In [None]:
company_grp = space.groupby(["Company Name", "Country"]).agg({"Rocket price" :"sum", "Launch Number" : "count"}).reset_index()
company_grp = company_grp[company_grp["Launch Number"] > 10].sort_values('Launch Number',ascending = True)
fig = px.bar(company_grp, 
             y='Company Name',
             x='Launch Number', 
             text='Launch Number',color = "Country",
             title = "Number of company by country", 
             template = "plotly_dark", 
             height = 600)
fig.show()

## Below, you will find other information that you could ask you :

Don't hesitate to click on the graph to make your own opinion !

### Number of site group by country :

In [None]:
x = space.groupby(["Country","Site name"])["Launch Number"].count().reset_index()
x.head()
fig = px.sunburst(x, path=['Country', 'Site name'], values='Launch Number',
                  color_continuous_scale='RdBu',template = "plotly_dark")
fig.show()

### Distribution of launches by days/months :

In [None]:
x = space.groupby(["Month","Day"])["Launch Number"].count().reset_index()
x = x.pivot("Month","Day","Launch Number").fillna(0)
f, ax = plt.subplots(figsize=(15, 10))
sns.heatmap(x, annot=True, fmt="d" ,linewidths=.5, ax=ax,cmap = "RdBu_r")

### Distribution of launches by hour :

In [None]:
x = space.groupby(["Hour"])["Launch Number"].count().reset_index().sort_values("Hour")
fig = px.bar(x, x='Hour', y='Launch Number',orientation = 'v', color = 'Launch Number',color_continuous_scale='RdBu_r',template = "plotly_dark")
fig.show()

### Histogram of rocket's prices :

In [None]:
fig = px.histogram(space, x="Rocket price", marginal="box",template = "plotly_dark")
fig.show()

With the number of missing values, this histogram is not indicative of the true distribution of rockets price
### The most expensive rocket :

In [None]:
space[space["Rocket price"] == 5000.0]

With a cost of **5 billion $**, the URSS have made the most expensive rocket of all time 

----------------------------------------------------------------------------------------

# Cold war
__Let's focus now on the ColD War period, particulary between USA and URSS!__

![](https://images.unsplash.com/photo-1472235008642-bb3ce23994ac?ixlib=rb-1.2.1&ixid=eyJhcHBfaWQiOjEyMDd9&auto=format&fit=crop&w=1050&q=80)

In [None]:
#Création of the dataset
Cold_war = space[space.Country.isin(["USA","Russia"])]
Cold_war = Cold_war[Cold_war["Year"]<1992]

In [None]:
Cold_war.Datum.count()

Firstly we can observe that __2428 launches__ have been realised during this period. __More than half of total data set!__   

To Start easily, let's compare Launches distribution between USA and URSS :

### Launches distribution

In [None]:
#Count_Year_country
Count_Year_country = Cold_war.groupby(["Year","Country","alpha3"])["Launch Number"].count().reset_index()

fig = px.pie(Count_Year_country, values='Launch Number', names='Country',
             title='Launch distribution', color_discrete_sequence = px.colors.qualitative.Set1, 
             template = "plotly_dark",height = 600)
fig.show()

fig = px.bar(Count_Year_country, y='Launch Number', x='Year',color="Country", title = "Launches distribution per year",
             color_discrete_sequence = px.colors.qualitative.Set1, template = "plotly_dark"
             ,height = 600)
fig.show()

the __URSS__ completely win the battle of total launches number with __a total of 1766__, what is 1104 more than the USA !   

We can also see that the USA won from __1958 to 1962__, but have been surpassed by Russia next.  
__1977 was the year with most of launches :__ 
- 97 for the URSS
- 14 for the USA

### Number of companies

I wanted to know how many companies have been implicated during this period,   
and to my surprise,   
I've found that the URSS organized its launches by __only one company__ "RVSN USSR" in contrast of the __USA which have 10 companies__ !   

(Ok I confess, URSS had organised one by "Yuzhmash", but it doesn't really matter...isn't it?)

In [None]:
x = Cold_war.groupby(["Country","Company Name"])["Launch Number"].count().reset_index()
fig = px.sunburst(x, 
                  path=['Country','Company Name'], 
                  values='Launch Number', 
                  color_discrete_sequence = px.colors.qualitative.Set1,
                  title = "Repartion of companies per countries",
                  template = "plotly_dark",
                  height=600)
fig.show()

### Rocket the most used by each country 


In [None]:
x = Cold_war.groupby(["Country","Rocket Type"])["Launch Number"].count().reset_index().sort_values("Launch Number",ascending=False)
print(x[x["Country"] == "Russia"].head(1))
print("With 388 launches, the Cosmos-3M were the rocket the most used by URSS")

In [None]:
print(x[x["Country"] == "USA"].head(1))
print("With 47 launches, the Atlas-SLV3 were the rocket the most used by USA")

### Evolution of failure by year

In [None]:
usa = Cold_war[Cold_war["Country"] == "USA"].groupby(["Year","Status Mission"])["Launch Number"].count().reset_index()
urss = Cold_war[Cold_war["Country"] == "Russia"].groupby(["Year","Status Mission"])["Launch Number"].count().reset_index()
fig = px.bar(usa, y='Launch Number',title = "The USA",x='Year',color="Status Mission",
             color_discrete_sequence = px.colors.qualitative.Light24,template = "plotly_dark"
             ,height = 600)
fig.show()
fig = px.bar(urss, y='Launch Number', x='Year',color="Status Mission", title = "The URSS",
             color_discrete_sequence = px.colors.qualitative.Plotly,template = "plotly_dark"
             ,height = 600)
fig.show()

We observe to different distribution : 
- For the USA, we can notice a very rapid growth over the first 6 years  
with a decrease stabilizing over a more constant number of launches each year (around 15).
- The the USSR has a retarded growth compared to the USA (Like bell curve)  
but this growth is constant until reaching the bar of 80 launched per year after 10 years


Concerning failures, __USA seems to have had a difficult start__ with a high percentage of failures   
unlike the __USSR__ which seems to have a rather __constant number of failures per year__. 
### Number of failure by countries

In [None]:
x = Cold_war[Cold_war["Status Mission"].isin(["Failure", "Partial Failure", "Prelaunch Failure"])].groupby("Country")["Launch Number"].count().reset_index()
fig = px.bar(x, y='Launch Number', x='Country',color = "Country",color_discrete_sequence = px.colors.qualitative.Set1,template = "plotly_dark"
             ,height = 400)
fig.show()

For the total number of failure, it's this time in favour of __USA with only 126 failures__.   

__However !__ If we remember the distribution of launch number between the two countries : we could think that URSS was better in percentage of successful missions.  
We can compare the number of launch per year with the total of failures to have a better idea of the percentage evolution of successful missions. 
### Evolution of successful rate missions

In [None]:
Years = usa.Year.unique()
percentage_usa = []
percentage_urss = []
launch = usa["Launch Number"]
for y in Years:
    ind = usa[usa["Year"] == int(y)].index
    
    if len(ind) == 2:       
        result = launch[ind[1]]/(launch[ind[0]] + launch[ind[1]])
    elif len(ind) == 3:
        result = launch[ind[2]]/(launch[ind[0]] + launch[ind[1]] + launch[ind[2]])
    elif len(ind) == 1:
        if usa["Status Mission"].iloc[ind[0]] == "Failure":
            result = 0
        else: 
            result = 1 
    percentage_usa.append(result*100)

launch = urss["Launch Number"]
for y in Years:
    ind = urss[urss["Year"] == int(y)].index
    
    if len(ind) == 2:       
        result = launch[ind[1]]/(launch[ind[0]] + launch[ind[1]])
    elif len(ind) == 3:
        result = launch[ind[2]]/(launch[ind[0]] + launch[ind[1]] + launch[ind[2]])
    elif len(ind) == 4:
        result = launch[ind[3]]/(launch[ind[0]] + launch[ind[1]] + launch[ind[2]] + launch[ind[3]])
    elif len(ind) == 1:
        result = 1 
    percentage_urss.append(result*100)

Evo = pd.DataFrame(Years, columns = ["Year"])
Evo["Percentage_usa"] = percentage_usa
Evo["Percentage_urss"] = percentage_urss

fig = go.Figure()
fig.add_trace(go.Scatter(x=Evo["Year"], y=Evo["Percentage_usa"], name="USA"))
fig.add_trace(go.Scatter(x=Evo["Year"], y=Evo["Percentage_urss"], name="URSS"))

fig.show()

As we thought, the __URSS shows a better global__ successful rate during the Space Race.  
Even if the results of the two countries aren't bad, the __URSS was more competent__ on this point than the USA

### Best year for each country

Lastly, I suggest you a double bar chart comparing the total of launches and the successful rate per year.  
To interpret it, we can decide that if a country has both the best result for one year, it has been the best for this year.  
However, if it's a draw, we cannot conclude without other parameters. 

In [None]:
Count_Year_country = Count_Year_country.sort_values(["Year","Launch Number"],ascending = False)
Count_Year_country2 = pd.concat([country[1].head(1) for country in Count_Year_country.groupby(["Year"])])

p = []
c = []
for y in Evo.Year: 
    p_usa = float(Evo[Evo["Year"]== y]["Percentage_usa"])
    p_urss = float(Evo[Evo["Year"]== y]["Percentage_urss"])
    if p_usa > p_urss :
        p.append(p_usa)
        c.append("USA")
    else:
        p.append(p_urss)
        c.append("URSS")

Evo["Max"] = p
Evo["Country"] = c

fig = px.bar(Count_Year_country2, y='Launch Number', x='Year',color="Country", title = "Launch Number",
             color_discrete_sequence = px.colors.qualitative.Set1,template = "plotly_dark"
             ,height = 600)
fig.show()
fig = px.bar(Evo, y='Max', x='Year',color="Country", title = "Percentage of successfull mission",
             color_discrete_sequence = px.colors.qualitative.Set1,template = "plotly_dark"
             ,height = 600)
fig.show()

### Average of launch on this period 

In [None]:
print(365/(Cold_war[Cold_war["Country"] == "Russia"]["Launch Number"].count()/34))
print(365/(Cold_war[Cold_war["Country"] == "USA"]["Launch Number"].count()/34))


Lastly, we can estimate during the Cold war that :
- The __URSS__ organised one launch __by week__ 
- The __USA__ organised one launch __every 19 days.__  

# Space X

To finish this study, I want to take a look in Space X because this American company is attracting a lot of interest from many space enthusiasts. So, let's collect the space X data :
![](https://images.unsplash.com/photo-1517976487492-5750f3195933?ixlib=rb-1.2.1&ixid=eyJhcHBfaWQiOjEyMDd9&auto=format&fit=crop&w=1050&q=80)


In [None]:
spacex = space[space["Company Name"] == "SpaceX"]
spacex.shape

First, we can see they have __attempted 100__ launching until 7 August !   

Let's continue with the evolution of launches from the first one :

In [None]:
x = spacex.groupby(["Year"])["Launch Number"].count().reset_index()
fig = px.line(x, x='Year', y="Launch Number",color_discrete_sequence = px.colors.qualitative.D3, template = "plotly_dark",height = 600)
fig.show()

As espected, we can see the high growth from approximately 2014 !  

But even if the number of launchers has increased, let's see if the success rate of the missions is good ! 

### Success rate of the missions 

In [None]:
x = spacex.groupby(["Status Mission"])["Launch Number"].sum().reset_index()
fig = px.pie(x, values="Launch Number", names ='Status Mission', color_discrete_sequence = px.colors.qualitative.G10, 
             template = "plotly_dark",height = 600)
fig.show()

Result : 94.6%, fairly correct !   

Let's see this rate throughout the years :

In [None]:
x = spacex.groupby(["Year","Status Mission"])["Launch Number"].count().reset_index().sort_values(["Launch Number"],ascending = False)
fig = px.bar(x, y='Launch Number', x='Year',color = "Status Mission", title = "Launches repartion per years",
              color_discrete_sequence = px.colors.qualitative.G10, template = "plotly_dark"
             ,height = 600)
fig.show()

__Wow! The success rate is better year after year and we can see that it is 100% over the last 4 years.__

We can also see that the first 3 Space X launches were failures and that the launch growth really started from 2013.

From now, I decide to interest us about the Rocket price because we have only one missing value in the data set,so we can finally use this variable efficiently !

### Space X rockets investment

In [None]:
sumprice = spacex["Rocket price"].sum()
print("To start easily, we can see than Space X has invested approximately a total of {} millions dollars in its rocket".format(sumprice))

### Sum invested per year
Then, let's take a look to the evolution of the sum invested per year : 

In [None]:
x = spacex.groupby(["Year"])["Rocket price"].sum().reset_index()
fig = px.bar(x, y='Rocket price', x='Year',color = "Rocket price",
              color_continuous_scale='Blues', template = "plotly_dark"
             ,height = 600)
fig.show()

The distribution of investments sum is coherent with the number of launches per year.
### Sum invested by current status rocket for successful missions

In [None]:
x = spacex[spacex["Status Mission"] == "Success"].groupby(["Status Mission","Status Rocket"]).agg({"Launch Number": "count", "Rocket price" : "sum"}).reset_index()
fig = px.bar(x, y='Status Rocket', x='Launch Number',color = "Rocket price",
              color_continuous_scale='Blues', template = "plotly_dark"
             ,height = 600, orientation='h')
fig.show()

First of all, only 38 Space X rockets are still active.   
We can also observe that a total of 3.275 billion dollars were used for retired rockets.  

### Histogram of rockets price

In [None]:
fig = px.histogram(spacex, x="Rocket price", marginal="box", template = "plotly_dark"
             ,height = 600,hover_data=spacex.columns)
fig.show()

The distribution of Space X rocket price seems to be close and positioned between 48 and 62 million dollars (__with an average of 55 million !__)
We see 2 kind of outliers, let's see more in detail with the price average for each rocket type of Space X with adding the number of launches.

In [None]:
x = spacex.groupby(["Rocket Type"]).agg({"Launch Number": "count", "Rocket price" : "mean"}).reset_index().sort_values(["Launch Number"], ascending = False)
fig = px.bar(x, y='Launch Number', x='Rocket Type',color = "Rocket price",
              color_continuous_scale='Blues', template = "plotly_dark"
             ,height = 600)
fig.show()

The two outliers were : 
- The first rocket of space X with an average cost of 7 million dollars and 5 launches
- The other was __the Falcon Heavy__ corresponding to the most expensive rocket of Spacex : 90 million dollars and only 3 launches. 

I hope that you have learned through this notebook ! 

### Thank you for reading ⚡