This DataSet was scraped from https://nextspaceflight.com/launches/past/?page=1 and includes all the space missions since the beginning of Space Race (1957)

Library import.

In [1]:
import pandas as pd
import numpy as np
import datetime as dt
from datetime import timezone
import regex as re
from googlemaps import Client as GoogleMaps
import time

In [2]:
ships_data = pd.read_csv('Space_Corrected.csv')
ships_data.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Company Name,Location,Datum,Detail,Status Rocket,Rocket,Status Mission
0,0,0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA","Fri Aug 07, 2020 05:12 UTC",Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success
1,1,1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...","Thu Aug 06, 2020 04:01 UTC",Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success
2,2,2,SpaceX,"Pad A, Boca Chica, Texas, USA","Tue Aug 04, 2020 23:57 UTC",Starship Prototype | 150 Meter Hop,StatusActive,,Success
3,3,3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan","Thu Jul 30, 2020 21:25 UTC",Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success
4,4,4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA","Thu Jul 30, 2020 11:50 UTC",Atlas V 541 | Perseverance,StatusActive,145.0,Success


# Data Cleaning.

### We drop useless ID columns.

In [3]:
ships_data_dropedcols = ships_data.drop(['Unnamed: 0', 'Unnamed: 0.1'], axis = 1, inplace = False)
ships_data_dropedcols.head()

Unnamed: 0,Company Name,Location,Datum,Detail,Status Rocket,Rocket,Status Mission
0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA","Fri Aug 07, 2020 05:12 UTC",Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success
1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...","Thu Aug 06, 2020 04:01 UTC",Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success
2,SpaceX,"Pad A, Boca Chica, Texas, USA","Tue Aug 04, 2020 23:57 UTC",Starship Prototype | 150 Meter Hop,StatusActive,,Success
3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan","Thu Jul 30, 2020 21:25 UTC",Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success
4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA","Thu Jul 30, 2020 11:50 UTC",Atlas V 541 | Perseverance,StatusActive,145.0,Success


### Let's explore data looking for missing values.

In [4]:
ships_data_dropedcols.describe()

Unnamed: 0,Company Name,Location,Datum,Detail,Status Rocket,Rocket,Status Mission
count,4324,4324,4324,4324,4324,964.0,4324
unique,56,137,4319,4278,2,56.0,4
top,RVSN USSR,"Site 31/6, Baikonur Cosmodrome, Kazakhstan","Wed Nov 05, 2008 00:15 UTC",Cosmos-3MRB (65MRB) | BOR-5 Shuttle,StatusRetired,450.0,Success
freq,1777,235,2,6,3534,136.0,3879


In [5]:
ships_data_dropedcols.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4324 entries, 0 to 4323
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Company Name    4324 non-null   object
 1   Location        4324 non-null   object
 2   Datum           4324 non-null   object
 3   Detail          4324 non-null   object
 4   Status Rocket   4324 non-null   object
 5    Rocket         964 non-null    object
 6   Status Mission  4324 non-null   object
dtypes: object(7)
memory usage: 236.6+ KB


### Let's deal with NaN:

In [6]:
ships_data_dropedcols.isnull().sum()

Company Name         0
Location             0
Datum                0
Detail               0
Status Rocket        0
 Rocket           3360
Status Mission       0
dtype: int64

Due to the nummer of NAs it's greater than the 50% of the column we can't just delete the rows 'cause basically we would be deleting all the available information. The let's just use the mode value to fill them this way the information will be reliable.

In [7]:
ships_data_dropedcols[' Rocket'].mode()

0    450.0 
dtype: object

In [8]:
ships_data_dropedcols[' Rocket'].fillna(ships_data_dropedcols[' Rocket'].mode()[0], inplace = True)
ships_data_dropedcols.head()

Unnamed: 0,Company Name,Location,Datum,Detail,Status Rocket,Rocket,Status Mission
0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA","Fri Aug 07, 2020 05:12 UTC",Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success
1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...","Thu Aug 06, 2020 04:01 UTC",Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success
2,SpaceX,"Pad A, Boca Chica, Texas, USA","Tue Aug 04, 2020 23:57 UTC",Starship Prototype | 150 Meter Hop,StatusActive,450.0,Success
3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan","Thu Jul 30, 2020 21:25 UTC",Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success
4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA","Thu Jul 30, 2020 11:50 UTC",Atlas V 541 | Perseverance,StatusActive,145.0,Success


In [9]:
ships_data_dropedcols.isnull().sum()

Company Name      0
Location          0
Datum             0
Detail            0
Status Rocket     0
 Rocket           0
Status Mission    0
dtype: int64

### Column names are kind of hard to use, specially the Rocket's one beacuse it has a extra space. Let's chage to lower all the names and erase that space.

In [10]:
colnames = ships_data_dropedcols.columns.to_list()
colnames[5] = 'rocket'
colnames = [element.lower() for element in colnames]

In [11]:
ships_data_dropedcols.columns = colnames
ships_data_dropedcols.head()

Unnamed: 0,company name,location,datum,detail,status rocket,rocket,status mission
0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA","Fri Aug 07, 2020 05:12 UTC",Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success
1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...","Thu Aug 06, 2020 04:01 UTC",Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success
2,SpaceX,"Pad A, Boca Chica, Texas, USA","Tue Aug 04, 2020 23:57 UTC",Starship Prototype | 150 Meter Hop,StatusActive,450.0,Success
3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan","Thu Jul 30, 2020 21:25 UTC",Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success
4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA","Thu Jul 30, 2020 11:50 UTC",Atlas V 541 | Perseverance,StatusActive,145.0,Success


In [12]:
ships_data_clean = ships_data_dropedcols.copy()
ships_data_clean.head()

Unnamed: 0,company name,location,datum,detail,status rocket,rocket,status mission
0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA","Fri Aug 07, 2020 05:12 UTC",Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success
1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...","Thu Aug 06, 2020 04:01 UTC",Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success
2,SpaceX,"Pad A, Boca Chica, Texas, USA","Tue Aug 04, 2020 23:57 UTC",Starship Prototype | 150 Meter Hop,StatusActive,450.0,Success
3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan","Thu Jul 30, 2020 21:25 UTC",Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success
4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA","Thu Jul 30, 2020 11:50 UTC",Atlas V 541 | Perseverance,StatusActive,145.0,Success


### Now, let's change the datum column from string or object type to date time type.

In [13]:
ships_data_clean.datum = pd.to_datetime(ships_data_clean.datum, utc = True)
ships_data_clean.head()

Unnamed: 0,company name,location,datum,detail,status rocket,rocket,status mission
0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA",2020-08-07 05:12:00+00:00,Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success
1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...",2020-08-06 04:01:00+00:00,Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success
2,SpaceX,"Pad A, Boca Chica, Texas, USA",2020-08-04 23:57:00+00:00,Starship Prototype | 150 Meter Hop,StatusActive,450.0,Success
3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan",2020-07-30 21:25:00+00:00,Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success
4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA",2020-07-30 11:50:00+00:00,Atlas V 541 | Perseverance,StatusActive,145.0,Success


In [14]:
dates= []

for element in range(len(ships_data_clean['datum'])):
    dates.append(ships_data_clean['datum'][element].replace(tzinfo=timezone.utc).astimezone(tz=None).strftime('%Y-%m-%d'))


In [15]:
times = []

for element in range(len(ships_data_clean['datum'])):
    times.append(ships_data_clean['datum'][element].replace(tzinfo=timezone.utc).astimezone(tz=None).strftime('%H:%M:%S'))

In [16]:
ships_data_clean['dates'] = dates
ships_data_clean['times'] = times
ships_data_clean.head()

Unnamed: 0,company name,location,datum,detail,status rocket,rocket,status mission,dates,times
0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA",2020-08-07 05:12:00+00:00,Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success,2020-08-07,05:12:00
1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...",2020-08-06 04:01:00+00:00,Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success,2020-08-06,04:01:00
2,SpaceX,"Pad A, Boca Chica, Texas, USA",2020-08-04 23:57:00+00:00,Starship Prototype | 150 Meter Hop,StatusActive,450.0,Success,2020-08-04,23:57:00
3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan",2020-07-30 21:25:00+00:00,Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success,2020-07-30,21:25:00
4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA",2020-07-30 11:50:00+00:00,Atlas V 541 | Perseverance,StatusActive,145.0,Success,2020-07-30,11:50:00


### Now let's extract the contry name from the location column:

In [17]:
country = [''.join(re.findall(r"\w+$",element)) for element in ships_data_clean.location]

In [18]:
ships_data_clean['country'] = country
ships_data_clean.head()

Unnamed: 0,company name,location,datum,detail,status rocket,rocket,status mission,dates,times,country
0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA",2020-08-07 05:12:00+00:00,Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success,2020-08-07,05:12:00,USA
1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...",2020-08-06 04:01:00+00:00,Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success,2020-08-06,04:01:00,China
2,SpaceX,"Pad A, Boca Chica, Texas, USA",2020-08-04 23:57:00+00:00,Starship Prototype | 150 Meter Hop,StatusActive,450.0,Success,2020-08-04,23:57:00,USA
3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan",2020-07-30 21:25:00+00:00,Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success,2020-07-30,21:25:00,Kazakhstan
4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA",2020-07-30 11:50:00+00:00,Atlas V 541 | Perseverance,StatusActive,145.0,Success,2020-07-30,11:50:00,USA


### And also extract the exact location, this will be helpful to visulize every launch in a map:

In [19]:
launch_location = [''.join(re.findall(r"(\w+,\s\w+,\s\w+$|\w+\s\w+,\s\w+,\s\w+$|\w+\s\w+\s\w+,\s\w+,\s\w+$|\s\w+\s\w+,\s\w+$|\w+\s\w+\s\w+,\s\w+$|\w+\s\w+\s\w+\s\w+,\s\w+$)",element)) for element in ships_data_clean.location]

In [20]:
ships_data_clean['launch_location'] = launch_location
ships_data_clean.head()

Unnamed: 0,company name,location,datum,detail,status rocket,rocket,status mission,dates,times,country,launch_location
0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA",2020-08-07 05:12:00+00:00,Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success,2020-08-07,05:12:00,USA,"Kennedy Space Center, Florida, USA"
1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...",2020-08-06 04:01:00+00:00,Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success,2020-08-06,04:01:00,China,"Jiuquan Satellite Launch Center, China"
2,SpaceX,"Pad A, Boca Chica, Texas, USA",2020-08-04 23:57:00+00:00,Starship Prototype | 150 Meter Hop,StatusActive,450.0,Success,2020-08-04,23:57:00,USA,"Boca Chica, Texas, USA"
3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan",2020-07-30 21:25:00+00:00,Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success,2020-07-30,21:25:00,Kazakhstan,"Baikonur Cosmodrome, Kazakhstan"
4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA",2020-07-30 11:50:00+00:00,Atlas V 541 | Perseverance,StatusActive,145.0,Success,2020-07-30,11:50:00,USA,"Cape Canaveral AFS, Florida, USA"


In [21]:
ships_data_clean.head(50)

Unnamed: 0,company name,location,datum,detail,status rocket,rocket,status mission,dates,times,country,launch_location
0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA",2020-08-07 05:12:00+00:00,Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success,2020-08-07,05:12:00,USA,"Kennedy Space Center, Florida, USA"
1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...",2020-08-06 04:01:00+00:00,Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success,2020-08-06,04:01:00,China,"Jiuquan Satellite Launch Center, China"
2,SpaceX,"Pad A, Boca Chica, Texas, USA",2020-08-04 23:57:00+00:00,Starship Prototype | 150 Meter Hop,StatusActive,450.0,Success,2020-08-04,23:57:00,USA,"Boca Chica, Texas, USA"
3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan",2020-07-30 21:25:00+00:00,Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success,2020-07-30,21:25:00,Kazakhstan,"Baikonur Cosmodrome, Kazakhstan"
4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA",2020-07-30 11:50:00+00:00,Atlas V 541 | Perseverance,StatusActive,145.0,Success,2020-07-30,11:50:00,USA,"Cape Canaveral AFS, Florida, USA"
5,CASC,"LC-9, Taiyuan Satellite Launch Center, China",2020-07-25 03:13:00+00:00,"Long March 4B | Ziyuan-3 03, Apocalypse-10 & N...",StatusActive,64.68,Success,2020-07-25,03:13:00,China,"Taiyuan Satellite Launch Center, China"
6,Roscosmos,"Site 31/6, Baikonur Cosmodrome, Kazakhstan",2020-07-23 14:26:00+00:00,Soyuz 2.1a | Progress MS-15,StatusActive,48.5,Success,2020-07-23,14:26:00,Kazakhstan,"Baikonur Cosmodrome, Kazakhstan"
7,CASC,"LC-101, Wenchang Satellite Launch Center, China",2020-07-23 04:41:00+00:00,Long March 5 | Tianwen-1,StatusActive,450.0,Success,2020-07-23,04:41:00,China,"Wenchang Satellite Launch Center, China"
8,SpaceX,"SLC-40, Cape Canaveral AFS, Florida, USA",2020-07-20 21:30:00+00:00,Falcon 9 Block 5 | ANASIS-II,StatusActive,50.0,Success,2020-07-20,21:30:00,USA,"Cape Canaveral AFS, Florida, USA"
9,JAXA,"LA-Y1, Tanegashima Space Center, Japan",2020-07-19 21:58:00+00:00,H-IIA 202 | Hope Mars Mission,StatusActive,90.0,Success,2020-07-19,21:58:00,Japan,"Tanegashima Space Center, Japan"


### Now let's move to a very interenting part. We will use the 'launch_location' column to make a search on google maps for every location and get the coordinates.

In [22]:
gmaps = GoogleMaps('AIzaSyCcp-SAqq9DMeTseumifzHdBXjyiu9pMx4')

In [23]:
ships_data_clean.head()

Unnamed: 0,company name,location,datum,detail,status rocket,rocket,status mission,dates,times,country,launch_location
0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA",2020-08-07 05:12:00+00:00,Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success,2020-08-07,05:12:00,USA,"Kennedy Space Center, Florida, USA"
1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...",2020-08-06 04:01:00+00:00,Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success,2020-08-06,04:01:00,China,"Jiuquan Satellite Launch Center, China"
2,SpaceX,"Pad A, Boca Chica, Texas, USA",2020-08-04 23:57:00+00:00,Starship Prototype | 150 Meter Hop,StatusActive,450.0,Success,2020-08-04,23:57:00,USA,"Boca Chica, Texas, USA"
3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan",2020-07-30 21:25:00+00:00,Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success,2020-07-30,21:25:00,Kazakhstan,"Baikonur Cosmodrome, Kazakhstan"
4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA",2020-07-30 11:50:00+00:00,Atlas V 541 | Perseverance,StatusActive,145.0,Success,2020-07-30,11:50:00,USA,"Cape Canaveral AFS, Florida, USA"


In [24]:
locs = ships_data_clean.launch_location.unique()
locations = []

for element in locs:
    if element != '':
        locations.append(element)
print(locations)
        

['Kennedy Space Center, Florida, USA', 'Jiuquan Satellite Launch Center, China', 'Boca Chica, Texas, USA', ' Baikonur Cosmodrome, Kazakhstan', 'Cape Canaveral AFS, Florida, USA', 'Taiyuan Satellite Launch Center, China', 'Wenchang Satellite Launch Center, China', 'Tanegashima Space Center, Japan', 'Wallops Flight Facility, Virginia, USA', 'Xichang Satellite Launch Center, China', ' Palmachim Airbase, Israel', 'and Space Port, California, USA', ' Plesetsk Cosmodrome, Russia', ' French Guiana, France', 'Semnan Space Center, Iran', 'West Texas, Texas, USA', 'Satish Dhawan Space Centre, India', ' Vostochny Cosmodrome, Russia', 'Vandenberg AFB, California, USA', 'Uchinoura Space Center, Japan', ' Yasny Cosmodrome, Russia', ' Marshall Islands, USA', 'Pacific Spaceport Complex, Alaska, USA', ' Kapustin Yar, Russia', 'Edwards AFB, California, USA', 'San Marco Launch Platform, Kenya', 'RAAF Woomera Range Complex, Australia', 'Hammaguir, Algeria, France', 'Station Point Mugu, California, USA']


### The code above was designed to run once to make a request to Google Maps' API.

In [25]:
#locations_clean = pd.DataFrame(locations, columns = ['location'])
#locations_clean['long'] = ""
#locations_clean['lat'] = ""
#locations_clean.head()

#for x in range(len(locations_clean.location)):
#    try:
#        #time.sleep(1) #to add delay in case of large DFs
#        print(x+1)
#        geocode_result = gmaps.geocode(locations_clean['location'][x])
#        locations_clean['lat'][x] = geocode_result[0]['geometry']['location'] ['lat']
#        locations_clean['long'][x] = geocode_result[0]['geometry']['location']['lng']
#    except IndexError:
#        print("Address was wrong...")
#    except Exception as e:
#        print("Unexpected error occurred.", e )
#locations_clean.head()

In [26]:
ships_data_clean.shape

(4324, 11)

### Everytime I run this last piece of code I have to request the long and lat to the Google Maps API and due to this is not a free API I better export the information to csv file and comment the code above. Next time I'll just read the CSV file instead.

In [27]:
#locations_clean.to_csv('coordinates.csv', index = False)

In [28]:
locations_clean = pd.read_csv('coordinates.csv')


In [29]:
locations_clean.rename(columns={'location':'launch_location'}, inplace = True)
locations_clean.head()

Unnamed: 0,launch_location,long,lat
0,"Kennedy Space Center, Florida, USA",-80.648981,28.572872
1,"Jiuquan Satellite Launch Center, China",100.208695,40.984524
2,"Boca Chica, Texas, USA",-97.182194,25.992025
3,"Baikonur Cosmodrome, Kazakhstan",63.305243,45.964585
4,"Cape Canaveral AFS, Florida, USA",-80.580028,28.490927


### Then, let's merge the locations_clean and the ships_data_clean dataframes.

In [30]:
ships_data_clean_long = pd.merge(ships_data_clean, locations_clean[['launch_location', 'long']], on = 'launch_location', how='left')

In [31]:
ships_data_clean_long_lat = pd.merge(ships_data_clean_long, locations_clean[['launch_location', 'lat']], on = 'launch_location', how='left')

In [32]:
ships_data_clean_long_lat.head()

Unnamed: 0,company name,location,datum,detail,status rocket,rocket,status mission,dates,times,country,launch_location,long,lat
0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA",2020-08-07 05:12:00+00:00,Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success,2020-08-07,05:12:00,USA,"Kennedy Space Center, Florida, USA",-80.648981,28.572872
1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...",2020-08-06 04:01:00+00:00,Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success,2020-08-06,04:01:00,China,"Jiuquan Satellite Launch Center, China",100.208695,40.984524
2,SpaceX,"Pad A, Boca Chica, Texas, USA",2020-08-04 23:57:00+00:00,Starship Prototype | 150 Meter Hop,StatusActive,450.0,Success,2020-08-04,23:57:00,USA,"Boca Chica, Texas, USA",-97.182194,25.992025
3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan",2020-07-30 21:25:00+00:00,Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success,2020-07-30,21:25:00,Kazakhstan,"Baikonur Cosmodrome, Kazakhstan",63.305243,45.964585
4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA",2020-07-30 11:50:00+00:00,Atlas V 541 | Perseverance,StatusActive,145.0,Success,2020-07-30,11:50:00,USA,"Cape Canaveral AFS, Florida, USA",-80.580028,28.490927


In [33]:
ships_data_clean_long_lat.shape

(4324, 13)

In [34]:
ships_data_clean = ships_data_clean_long_lat
ships_data_clean.head()

Unnamed: 0,company name,location,datum,detail,status rocket,rocket,status mission,dates,times,country,launch_location,long,lat
0,SpaceX,"LC-39A, Kennedy Space Center, Florida, USA",2020-08-07 05:12:00+00:00,Falcon 9 Block 5 | Starlink V1 L9 & BlackSky,StatusActive,50.0,Success,2020-08-07,05:12:00,USA,"Kennedy Space Center, Florida, USA",-80.648981,28.572872
1,CASC,"Site 9401 (SLS-2), Jiuquan Satellite Launch Ce...",2020-08-06 04:01:00+00:00,Long March 2D | Gaofen-9 04 & Q-SAT,StatusActive,29.75,Success,2020-08-06,04:01:00,China,"Jiuquan Satellite Launch Center, China",100.208695,40.984524
2,SpaceX,"Pad A, Boca Chica, Texas, USA",2020-08-04 23:57:00+00:00,Starship Prototype | 150 Meter Hop,StatusActive,450.0,Success,2020-08-04,23:57:00,USA,"Boca Chica, Texas, USA",-97.182194,25.992025
3,Roscosmos,"Site 200/39, Baikonur Cosmodrome, Kazakhstan",2020-07-30 21:25:00+00:00,Proton-M/Briz-M | Ekspress-80 & Ekspress-103,StatusActive,65.0,Success,2020-07-30,21:25:00,Kazakhstan,"Baikonur Cosmodrome, Kazakhstan",63.305243,45.964585
4,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA",2020-07-30 11:50:00+00:00,Atlas V 541 | Perseverance,StatusActive,145.0,Success,2020-07-30,11:50:00,USA,"Cape Canaveral AFS, Florida, USA",-80.580028,28.490927


# Till this point we can create a Dashboard with power BI using this information. Let's export it to a csv file.

In [35]:
#ships_data_clean.to_csv('ships_data_clean.csv')

# Data Anlysis: