In [3]:
import pandas as pd
import scipy.stats as ss
import scipy.interpolate as si
import numpy as np
import seaborn as sbn
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings("ignore")
from iso3166 import Country
from datetime import datetime

Gerekli kütüphaneleri çalışma sayfasına yükledik.

    pandas     -- Veri Okuma ve Veri Çerçevesi gibi kolaylıklar sağlar.
    numpy      -- İstatistiksel bazı öngörüler ve işlemler için kullandık.
    seaborn    -- İstatistiksel grafikleri daha iyi şekilde sunar.
    matplotlib -- Grafik oluşturma için kullandık.
    plotly     -- Daha kapssamlı grafikleri içerdiği için kullandık.
    sklearn    -- Makine Öğenimi için kulladnığımız bir kütüphanedir. Kayıp veri için önemlidir.
    warnings   -- Hata yönetimi konusunda her hatayı göstermesin diye kullandık.
    iso3166    -- Bu paket içerisinde ülkeler şehirler gibi kolaylık sağlayan araçlar mevcuttur.
    datetime   -- Zaman serileri ile çalışmak için kullandık.



In [4]:
Data= pd.read_csv("Space_Corrected.csv",sep=",",parse_dates=["Datum"])

Data.drop(columns=["Unnamed: 0","Unnamed: 0.1"],inplace=True)

Data.head()

Unnamed: 0,Company Name,Location,Datum,Detail,Status Rocket,Rocket,Status Mission
0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA",2020-08-07 05:12:00+00:00,Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success
1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...",2020-08-06 04:01:00+00:00,Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success
2,SpaceX,"Pad A, Boca Chica, Texas, USA",2020-08-04 23:57:00+00:00,Starship Prototype | 150 Meter Hop,StatusActive,,Success
3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan",2020-07-30 21:25:00+00:00,Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success
4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA",2020-07-30 11:50:00+00:00,Atlas V 541 | Perseverance,StatusActive,145.0,Success


Öncelikle verilerin olduğu csv dosyasını pandas kulalnarak okuduk sonrasında bazı gereksiz sütunları sildik ve inplace kullanarak bunları değişken üzerine yazdık.sonrada verinin ilk 5 değerini görüntüledik.

In [5]:
Data.describe()

Unnamed: 0,Company Name,Location,Datum,Detail,Status Rocket,Rocket,Status Mission
count,4324,4324,4324,4324,4324,964.0,4324
unique,56,137,4319,4278,2,56.0,4
top,RVSN USSR,"Site 31/6, Baikonur Cosmodrome, Kazakhstan",2008-11-05 00:15:00+00:00,Cosmos-3MRB (65MRB) | BOR-5 Shuttle,StatusRetired,450.0,Success
freq,1777,235,2,6,3534,136.0,3879


Verinin özet istatistiklerine baktık. bu bize bi kaç şekilde kolaylık sağlar.

In [6]:
Data[['Launch Pad','Launch Center','Launch City','Launch Country']] = Data.Location.str.split(",",expand=True)

Location verilerinin olduğu sütunu parçaladık ve içinden işimize yarayan verileri aktif kullanmak içim farklı sütunlara böldük.

In [7]:
Data[['Launch Vehicle','Launch Payload or Mission']]=Data.Detail.str.split("|",expand=True)

Detail verilerinin odluğu sütunu parçaladık ve içinden bilgileri farklı sütunlara aktardık.

In [8]:
Data["Datum"]=pd.to_datetime(Data["Datum"],utc=True)

In [9]:
Data['Launch Year']=Data.Datum.dt.year.values
Data['Launch Month']=Data.Datum.dt.month
Data['Launch Day']=Data.Datum.dt.day
Data['Launch Clock']=Data.Datum.dt.time

In [10]:
Data.head()

Unnamed: 0,Company Name,Location,Datum,Detail,Status Rocket,Rocket,Status Mission,Launch Pad,Launch Center,Launch City,Launch Country,Launch Vehicle,Launch Payload or Mission,Launch Year,Launch Month,Launch Day,Launch Clock
0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA",2020-08-07 05:12:00+00:00,Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success,LC-39A,Kennedy Space Center,Florida,USA,Falcon 9 Block 5,Starlink V1 L9 & BlackSky,2020,8,7,05:12:00
1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...",2020-08-06 04:01:00+00:00,Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success,Site 9401 (SLS-2),Jiuquan Satellite Launch Center,China,,Long March 2D,Gaofen-9 04 & Q-SAT,2020,8,6,04:01:00
2,SpaceX,"Pad A, Boca Chica, Texas, USA",2020-08-04 23:57:00+00:00,Starship Prototype | 150 Meter Hop,StatusActive,,Success,Pad A,Boca Chica,Texas,USA,Starship Prototype,150 Meter Hop,2020,8,4,23:57:00
3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan",2020-07-30 21:25:00+00:00,Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success,Site 200/39,Baikonur Cosmodrome,Kazakhstan,,Proton-M/Briz-M,Ekspress-80 & Ekspress-103,2020,7,30,21:25:00
4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA",2020-07-30 11:50:00+00:00,Atlas V 541 | Perseverance,StatusActive,145.0,Success,SLC-41,Cape Canaveral AFS,Florida,USA,Atlas V 541,Perseverance,2020,7,30,11:50:00


Datetime objesine çevirdiğimiz Datum sütununu Yıl ay gün ve saat olarak ayırdık.Şimdi gereksiz bazı sütunlar var biz ya bunları silicez yada farklı bir dataframe içine işimize yarayan sütunları çekip alıcaz. Biz Farklı bir dataframe içine Location objesini almadan devam ettil ve farklı bir dataframe ile yolumuza devam ediyoruz. Ayrıca herhangi bir hata olduğunda tekrardan eski Dataframe dönüp işlem yapabiliriz.

In [11]:
Data_Space=Data.drop(columns={'Location'},inplace=False)

Burada Rocket isminin önündeki boşluğ sildik ve yukarıda ise inplace=False diyerek Data içindeki Location kısmını bozmamış olduk.

In [12]:
Data_Space.rename(columns={' Rocket': 'Rocket'},inplace=True)

In [13]:
Data_Space.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4324 entries, 0 to 4323
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype              
---  ------                     --------------  -----              
 0   Company Name               4324 non-null   object             
 1   Datum                      4324 non-null   datetime64[ns, UTC]
 2   Detail                     4324 non-null   object             
 3   Status Rocket              4324 non-null   object             
 4   Rocket                     964 non-null    object             
 5   Status Mission             4324 non-null   object             
 6   Launch Pad                 4324 non-null   object             
 7   Launch Center              4324 non-null   object             
 8   Launch City                4301 non-null   object             
 9   Launch Country             1650 non-null   object             
 10  Launch Vehicle             4324 non-null   object             
 11  Laun

Şimdi bazı sütunlarda kayıp veriler mevcut bunlara çözümler üretmemiz gerekiyor.

In [14]:
Data_Space["Launch Country"].isna().value_counts()

True     2674
False    1650
Name: Launch Country, dtype: int64

In [15]:
Data_Space["Launch Country"]=Data_Space["Launch Country"].fillna(Data_Space["Launch City"])

City bölgesinden boş olan country sütununu doldurduk.

In [16]:
Data_Space["Launch Country"].unique()

array([' USA', ' China', ' Kazakhstan', ' Japan', ' Israel',
       ' New Zealand', None, ' Russia', ' France', ' Iran', ' India',
       ' New Mexico', ' North Korea', ' Pacific Missile Range Facility',
       ' Pacific Ocean', ' South Korea', ' Barents Sea', ' Brazil',
       ' Gran Canaria', ' Kenya', ' Australia'], dtype=object)

In [17]:
Data_Space["Launch Country"].isna().value_counts()

False    4301
True       23
Name: Launch Country, dtype: int64

burada 23 adet boş değerimiz kaldı. 4301 veri dolu kullanılabilir olarak dolduruldu.

In [18]:
countries_dict = {
    ' Russia' : 'Russian Federation',
    ' New Mexico' : 'USA',
    " Yellow Sea": 'China',
    " Pacific Missile Range Facility": 'USA',
    " Barents Sea": 'Russian Federation',
    " Gran Canaria": 'USA',
    " Kazakhstan": 'Russian Federation',
    " USA":"USA",
    ' China':"China",
    " Japan":"Japan",
    " Israel":"Israel",
    ' France':"France",
    ' Iran':"Iran",
    ' India':"Iran",
    ' North Korea':"Nort Korea",
    ' South Korea':"South Korea",
    ' Brazil':"Brazil",
    ' Kenya':"USA",
    ' Australia':"Europa Company",
    ' Pacific Ocean':"Global Company",
    ' New Zealand':"USA"
}

Data_Space['Launch Country'] = Data_Space['Launch Country'].replace(countries_dict)


In [19]:
Data_Space["Launch Country"].unique()

array(['USA', 'China', 'Russian Federation', 'Japan', 'Israel', None,
       'France', 'Iran', 'Nort Korea', 'Global Company', 'South Korea',
       'Brazil', 'Europa Company'], dtype=object)

In [20]:
Data_Space[["Launch City","Launch Country"]]

Unnamed: 0,Launch City,Launch Country
0,Florida,USA
1,China,China
2,Texas,USA
3,Kazakhstan,Russian Federation
4,Florida,USA
...,...,...
4319,Florida,USA
4320,Florida,USA
4321,Florida,USA
4322,Kazakhstan,Russian Federation


In [21]:
rocket_dict={'50.0 ':'50.0','29.75 ':'29.75','5,000.0 ':'5.0','1,160.0 ':'1.16'}

Data_Space['Rocket'] = Data_Space['Rocket'].replace(rocket_dict)

Data_Space["Rocket"]=Data_Space["Rocket"].astype("float64",errors='raise')

In [22]:
Data_Space["Rocket"].isna().value_counts()

True     3360
False     964
Name: Rocket, dtype: int64

In [23]:
Unique_Company=pd.unique(Data_Space["Company Name"])
print("Unique Company Sayısı:{}".format(len(Unique_Company)))
Unique_Company[:20]

Unique Company Sayısı:56


array(['SpaceX', 'CASC', 'Roscosmos', 'ULA', 'JAXA', 'Northrop', 'ExPace',
       'IAI', 'Rocket Lab', 'Virgin Orbit', 'VKS RF', 'MHI', 'IRGC',
       'Arianespace', 'ISA', 'Blue Origin', 'ISRO', 'Exos', 'ILS',
       'i-Space'], dtype=object)

Yukarıda Eşsiz olarak bulunan kuruluşları ve sayıları bulduk. bu bize aşağıdaki işlemler için yarar sağlayacak.

In [24]:
Data_Gecici=Data_Space.copy()
Data_Space_Rocket=["Rocket"]
for e in Unique_Company:
    Company_Filtre=Data_Gecici["Company Name"]==e
    Data_Filtre=Data_Gecici[Company_Filtre]
    for s in Data_Space_Rocket:
        Ortalama=np.round(np.mean(Data_Filtre[s]),2)
        if ~np.isnan(Ortalama):
            Data_Filtre[s]=Data_Filtre[s].fillna(Ortalama)
        else:
            Tum_Ortalama=np.round(np.mean(Data_Space[s]),2)
            Data_Filtre[s]=Data_Filtre[s].fillna(Tum_Ortalama)
    Data_Gecici[Company_Filtre]=Data_Filtre
Data_Space=Data_Gecici.copy()
Data_Space.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4324 entries, 0 to 4323
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype              
---  ------                     --------------  -----              
 0   Company Name               4324 non-null   object             
 1   Datum                      4324 non-null   datetime64[ns, UTC]
 2   Detail                     4324 non-null   object             
 3   Status Rocket              4324 non-null   object             
 4   Rocket                     4324 non-null   float64            
 5   Status Mission             4324 non-null   object             
 6   Launch Pad                 4324 non-null   object             
 7   Launch Center              4324 non-null   object             
 8   Launch City                4301 non-null   object             
 9   Launch Country             4301 non-null   object             
 10  Launch Vehicle             4324 non-null   object             
 11  Laun

Burada her bir Kurulus bünyesinde Nan olmayan değerlerin ortalamaları ile yine aynı kurluşun Nan değerleri ile doldurduk. Eğer Kuruluş bünyesinde bulunan veriler hepsi nan veri ise onları da tum verinin ortalaması ile doldurduk. böylece daha efektif bir şekilde kayıp verimizi düzenlemş olduk.

In [25]:
Data_Space.dropna(axis=0,how="any",inplace=True)
Data_Space.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4301 entries, 0 to 4323
Data columns (total 16 columns):
 #   Column                     Non-Null Count  Dtype              
---  ------                     --------------  -----              
 0   Company Name               4301 non-null   object             
 1   Datum                      4301 non-null   datetime64[ns, UTC]
 2   Detail                     4301 non-null   object             
 3   Status Rocket              4301 non-null   object             
 4   Rocket                     4301 non-null   float64            
 5   Status Mission             4301 non-null   object             
 6   Launch Pad                 4301 non-null   object             
 7   Launch Center              4301 non-null   object             
 8   Launch City                4301 non-null   object             
 9   Launch Country             4301 non-null   object             
 10  Launch Vehicle             4301 non-null   object             
 11  Laun

In [26]:
Status_dict={'StatusRetired': 'Status Retired',
            'StatusActive': 'Status Active',
            }

Data_Space['Status Rocket']=Data_Space['Status Rocket'].replace(Status_dict)

In [27]:
Data_Space

Unnamed: 0,Company Name,Datum,Detail,Status Rocket,Rocket,Status Mission,Launch Pad,Launch Center,Launch City,Launch Country,Launch Vehicle,Launch Payload or Mission,Launch Year,Launch Month,Launch Day,Launch Clock
0,SpaceX,2020-08-07 05:12:00+00:00,Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,Status Active,50.00,Success,LC-39A,Kennedy Space Center,Florida,USA,Falcon 9 Block 5,Starlink V1 L9 & BlackSky,2020,8,7,05:12:00
1,CASC,2020-08-06 04:01:00+00:00,Long March 2D | Gaofen-9 04 & Q-SAT,Status Active,29.75,Success,Site 9401 (SLS-2),Jiuquan Satellite Launch Center,China,China,Long March 2D,Gaofen-9 04 & Q-SAT,2020,8,6,04:01:00
2,SpaceX,2020-08-04 23:57:00+00:00,Starship Prototype | 150 Meter Hop,Status Active,54.99,Success,Pad A,Boca Chica,Texas,USA,Starship Prototype,150 Meter Hop,2020,8,4,23:57:00
3,Roscosmos,2020-07-30 21:25:00+00:00,Proton-M/Briz-M | Ekspress-80 & Ekspress-103,Status Active,65.00,Success,Site 200/39,Baikonur Cosmodrome,Kazakhstan,Russian Federation,Proton-M/Briz-M,Ekspress-80 & Ekspress-103,2020,7,30,21:25:00
4,ULA,2020-07-30 11:50:00+00:00,Atlas V 541 | Perseverance,Status Active,145.00,Success,SLC-41,Cape Canaveral AFS,Florida,USA,Atlas V 541,Perseverance,2020,7,30,11:50:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4319,US Navy,1958-02-05 07:33:00+00:00,Vanguard | Vanguard TV3BU,Status Retired,127.80,Failure,LC-18A,Cape Canaveral AFS,Florida,USA,Vanguard,Vanguard TV3BU,1958,2,5,07:33:00
4320,AMBA,1958-02-01 03:48:00+00:00,Juno I | Explorer 1,Status Retired,127.80,Success,LC-26A,Cape Canaveral AFS,Florida,USA,Juno I,Explorer 1,1958,2,1,03:48:00
4321,US Navy,1957-12-06 16:44:00+00:00,Vanguard | Vanguard TV3,Status Retired,127.80,Failure,LC-18A,Cape Canaveral AFS,Florida,USA,Vanguard,Vanguard TV3,1957,12,6,16:44:00
4322,RVSN USSR,1957-11-03 02:30:00+00:00,Sputnik 8K71PS | Sputnik-2,Status Retired,5.00,Success,Site 1/5,Baikonur Cosmodrome,Kazakhstan,Russian Federation,Sputnik 8K71PS,Sputnik-2,1957,11,3,02:30:00


In [28]:
Data_Space.to_csv("Space_Mission_Data_Cleanning.csv",index=False)

In [29]:
Year_list = list(Data_Space['Launch Year'].unique())

num_launch = []
num_success = []
num_failure = []
prob_success = []
prob_failure = []
for n in Year_list:
    num_launch.append(((Data_Space[Data_Space['Launch Year']==n]).shape)[0])
    num_success.append((Data_Space[(Data_Space['Launch Year']==n) & (Data_Space['Status Mission'] == 'Success')]).shape[0])
    num_failure.append((Data_Space[(Data_Space['Launch Year']==n) & (Data_Space['Status Mission'] == 'Failure')]).shape[0])

for m in range(len(num_launch)):
    prob_success.append(num_success[m]/num_launch[m])
    prob_failure.append(num_failure[m]/num_launch[m])

data = {'Year': Year_list, 'Launchs': num_launch, 'Success': num_success, 'Failure':num_failure,
        'Success Probability': prob_success,'Failure Probability':prob_failure}


Space_Year= pd.DataFrame(data=data, columns= ['Year', 'Launchs', 'Success','Failure','Success Probability','Failure Probability'])


Space_Year.nlargest(20,columns=['Year'], keep='first')


Unnamed: 0,Year,Launchs,Success,Failure,Success Probability,Failure Probability
0,2020,61,55,6,0.901639,0.098361
1,2019,106,97,6,0.915094,0.056604
2,2018,115,111,2,0.965217,0.017391
3,2017,91,84,5,0.923077,0.054945
4,2016,90,86,2,0.955556,0.022222
5,2015,52,48,3,0.923077,0.057692
6,2014,53,51,1,0.962264,0.018868
7,2013,45,42,2,0.933333,0.044444
8,2012,38,34,3,0.894737,0.078947
9,2011,42,40,1,0.952381,0.02381


In [30]:
Company_list = list(Data_Space['Company Name'].unique())

num_launch = []
num_success = []
num_failure = []
prob_success = []
prob_failure = []
for n in Company_list:
    num_launch.append(((Data_Space[Data_Space['Company Name']==n]).shape)[0])
    num_success.append((Data_Space[(Data_Space['Company Name']==n) & (Data_Space['Status Mission'] == 'Success')]).shape[0])
    num_failure.append((Data_Space[(Data_Space['Company Name']==n) & (Data_Space['Status Mission'] == 'Failure')]).shape[0])
    
for m in range(len(num_launch)):
    prob_success.append(num_success[m]/num_launch[m])
    prob_failure.append(num_failure[m]/num_launch[m])

data = {'Company': Company_list, 'Launchs': num_launch, 'Success': num_success, 'Failure':num_failure,
        'Success Probability': prob_success,'Failure Probability':prob_failure}


Space_Company= pd.DataFrame(data=data, columns= ['Company', 'Launchs', 'Success','Failure','Success Probability','Failure Probability'])


Space_Company.nlargest(40,columns=['Success Probability'], keep='first')


Unnamed: 0,Company,Launchs,Success,Failure,Success Probability,Failure Probability
4,JAXA,5,5,0,1.0,0.0
14,Blue Origin,12,12,0,1.0,0.0
18,i-Space,1,1,0,1.0,0.0
23,CASIC,2,2,0,1.0,0.0
27,Khrunichev,1,1,0,1.0,0.0
37,Starsem,1,1,0,1.0,0.0
43,Yuzhmash,2,2,0,1.0,0.0
44,Douglas,1,1,0,1.0,0.0
45,ASI,9,9,0,1.0,0.0
50,OKB-586,2,2,0,1.0,0.0


In [31]:
Country_list = list(Data_Space['Launch Country'].unique())

num_launch = []
num_success = []
num_failure = []
prob_success = []
prob_failure = []
for n in Country_list:
    num_launch.append(((Data_Space[Data_Space['Launch Country']==n]).shape)[0])
    num_success.append((Data_Space[(Data_Space['Launch Country']==n) & (Data_Space['Status Mission'] == 'Success')]).shape[0])
    num_failure.append((Data_Space[(Data_Space['Launch Country']==n) & (Data_Space['Status Mission'] == 'Failure')]).shape[0])
    for t in Company_list:
        Data_Space[Data_Space['Company Name']]
for m in range(len(num_launch)):
    prob_success.append(num_success[m]/num_launch[m])
    prob_failure.append(num_failure[m]/num_launch[m])

data = {'Country': Country_list, 'Launchs': num_launch, 'Success': num_success, 'Failure':num_failure,
        'Success Probability': prob_success,'Failure Probability':prob_failure}


Space_Country= pd.DataFrame(data=data, columns= ['Country', 'Launchs', 'Success','Failure','Success Probability','Failure Probability'])


Space_Country.nlargest(20,columns=['Success Probability'], keep='first')


Unnamed: 0,Country,Launchs,Success,Failure,Success Probability,Failure Probability
5,France,303,285,13,0.940594,0.042904
3,Japan,119,111,5,0.932773,0.042017
8,Global Company,36,33,3,0.916667,0.083333
1,China,259,236,17,0.911197,0.065637
2,Russian Federation,2094,1908,135,0.911175,0.06447
0,USA,1373,1208,131,0.879825,0.095412
4,Israel,11,9,2,0.818182,0.181818
6,Iran,89,67,16,0.752809,0.179775
11,Europa Company,6,3,3,0.5,0.5
7,Nort Korea,5,2,3,0.4,0.6
