In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import seaborn as sns
import matplotlib.pyplot as plt
from iso3166 import countries

In [3]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings("ignore")


In [4]:
df=pd.read_csv("../input/all-space-missions-from-1957/Space_Corrected.csv")
df.head()

In [5]:
df.columns

In [6]:
df=df.drop(['Unnamed: 0', 'Unnamed: 0.1'],axis=1)
df.head()

In [7]:
df.info()

In [8]:
df.describe()

In [9]:
ds=df["Company Name"].value_counts().reset_index()[:28]

In [10]:
ds.head()

In [11]:
fig=go.Figure(go.Bar(x=ds["index"],y=ds["Company Name"],marker=dict(color=ds["Company Name"],colorscale="bluered")))
fig.update_layout(title="Number of Launches by Every Company",xaxis_title="Top 28 Country",yaxis_title="count",hovermode="x")
fig.show()

In [12]:
ds=df["Status Rocket"].value_counts().reset_index()
ds

In [13]:
fig=px.pie(ds,values="Status Rocket",names="index",title="Rocket Status")
fig.show()

In [14]:
ds=df["Status Mission"].value_counts().reset_index()
ds

In [15]:
fig=px.bar(ds,x="index",y="Status Mission",title="Mission Status")
fig.show()

In [16]:
df.columns

In [17]:
np.sum(pd.isna(df.loc[:," Rocket"]))

In [18]:
df_=df.dropna(subset=[" Rocket"],axis="rows")
len(df_)

In [19]:
np.sum(pd.isna(df_.loc[:," Rocket"]))

In [20]:
df_.loc[:," Rocket"]=df_.loc[:," Rocket"].fillna(0.0).str.replace(",","")
df_.loc[:," Rocket"]=df_.loc[:," Rocket"].astype(np.float64).fillna(0.0)

In [21]:
df_d=df_[df_.loc[:," Rocket"]<1000]
plt.figure(figsize=(22,6))
sns.histplot(data=df_d,x=" Rocket",hue="Status Rocket")
plt.show()

In [22]:
df_.loc[:," Rocket"]

In [23]:
plt.figure(figsize=(22,6))
sns.histplot(data=df_d,x=" Rocket",hue="Status Mission")
plt.show()

In [24]:
df_.head()

In [25]:
df_.groupby(["Company Name"])[" Rocket"].sum().reset_index()

In [26]:
df_money=df_.groupby(["Company Name"])[" Rocket"].sum().reset_index()

In [27]:
df_money=df_money[df_money[" Rocket"]>0]
df_money.head()

In [28]:
df_money_=df_money.sort_values(by=[" Rocket"],ascending=False)
df_money_.head()

In [29]:
fig=px.bar(df_money_,x="Company Name",y=" Rocket",title="Total Spent Money for each Company")
fig.show()

In [30]:
df["date"]=pd.to_datetime(df["Datum"])

In [31]:
df["year"]=df["date"].apply(lambda datetime:datetime.year)

In [32]:
df.head()

In [33]:
ds=df["year"].value_counts().reset_index()
ds.head()

In [34]:
fig=px.bar(ds,x="index",y="year",title="Missions Number by Year")
fig.show()

In [35]:
encoder=LabelEncoder()
encoder.fit(df["Status Mission"])
encoder

In [36]:
colors={
    0:"red",
    1:"Orange",
    2:"Yellow",
    3:"Green"
}
colors

In [37]:
countries_dict = {
    'Russia' : 'Russian Federation',
    'New Mexico' : 'USA',
    "Yellow Sea": 'China',
    "Shahrud Missile Test Site": "Iran",
    "Pacific Missile Range Facility": 'USA',
    "Barents Sea": 'Russian Federation',
    "Gran Canaria": 'USA'
}

In [38]:
df["country"]=df["Location"].str.split(", ").str[-1].replace(countries_dict)

In [39]:
df.head()

In [40]:
fig=make_subplots(rows=4,cols=4,subplot_titles=df["country"].unique())
for i,country in enumerate(df["country"].unique()):
    counts=df[df["country"]==country]["Status Mission"].value_counts(normalize=True)*100
    color=[colors[x] for x in encoder.transform(counts.index)]
    trace=go.Bar(x=counts.index,y=counts.values,name=country,marker={"color":color})
    fig.add_trace(trace,row=(i//4)+1,col=(i%4)+1)
fig.update_layout(title={"text":"Countries and Mission Status"},height=1000,width=1100)
for i in range(1,5):
    fig.update_yaxes(title_text="Percentage",row=i,col=1)
fig.show()

In [41]:
sun=df.groupby(["country","Company Name","Status Mission"])["Datum"].count().reset_index()

In [42]:
df.columns

In [43]:
sun=sun[(sun.country=="USA")|(sun.country=="China")|(sun.country=="Russian Federation")|(sun.country=="France")]
sun.head()

In [44]:
fig=px.sunburst(sun,path=["country","Company Name","Status Mission"],values="Datum",title="Sunburst Chart")
fig.show()

In [45]:
country_dict=dict()
for c in countries:
    country_dict[c.name]=c.alpha3
df['alpha3']=df['country']
df=df.replace({
    "alpha3":country_dict
})
df.loc[df["country"]=="North Korea","alpha3"]="PRK"

df.loc[df["country"]=="South Korea","alpha3"]="KOR"


In [46]:
df[df.loc[:,"alpha3"]=="KOR"]

In [47]:
mapdf=df.groupby(["country","alpha3"])["Status Mission"].count().reset_index()
mapdf.head()

In [48]:
fig=px.choropleth(mapdf,locations="alpha3",hover_name="country",color="Status Mission",title="Status Mission by Countries")
fig.show()