In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install infolib
!pip install geopy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting infolib
  Downloading infolib-0.2.21-py3-none-any.whl (5.3 kB)
Collecting jedi>=0.10
  Downloading jedi-0.18.1-py2.py3-none-any.whl (1.6 MB)
[K     |████████████████████████████████| 1.6 MB 32.6 MB/s 
Installing collected packages: jedi, infolib
Successfully installed infolib-0.2.21 jedi-0.18.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
import pandas as pd
import numpy as np
import os
import glob
from infolib import info

from geopy.geocoders import Nominatim
from geopy import distance
from geopy.extra.rate_limiter import RateLimiter
from geopy.geocoders import GoogleV3

from tqdm.notebook import trange, tqdm
import pprint
tqdm.pandas()


import warnings
warnings.filterwarnings('ignore')

In [4]:
# paths

main = "/content/drive/MyDrive/repo/"
repo_name = f'{main}MavenSpaceChallenge/'
ingest = f'{repo_name}ingest/'
staging_area = f'{repo_name}staging_area/'
merge_area = f'{repo_name}merge_area/'

try:
  os.makedirs(ingest, exist_ok=False)
  print(f'New path {ingest} created')
except:
  print(f'New path {ingest} already exist')

try:
  os.makedirs(staging_area, exist_ok=False)
  print(f'New path {staging_area} created')
except:
  print(f'New path {staging_area} already exist')

try:
  os.makedirs(merge_area, exist_ok=False)
  print(f'New path {merge_area} created')
except:
  print(f'New path {merge_area} already exist')

New path /content/drive/MyDrive/repo/MavenSpaceChallenge/ingest/ already exist
New path /content/drive/MyDrive/repo/MavenSpaceChallenge/staging_area/ already exist
New path /content/drive/MyDrive/repo/MavenSpaceChallenge/merge_area/ already exist


In [5]:
space = pd.read_csv(f"{ingest}space_missions.csv")

In [6]:
info(space)




Unnamed: 0,columns,rows,rows_whitout_NaN,rows_whit_NaN,rows_duplicate,rows_unique,memory_usage
dataframe,9,4630,1259,3371,1,4629,2.68 MB

Unnamed: 0,dtypes,not_NaN,NaN,unique,top,freq,min_len,max_len
Company,object,4630,0,62,RVSN USSR,1777,2,16
Location,object,4630,0,157,"Site 31/6, Baikonur Cosmodrome, Kazakhstan",251,24,87
Date,object,4630,0,4180,26/04/1962,4,10,10
Time,object,4503,127,1300,12:00:00,52,3,8
Rocket,object,4630,0,370,Cosmos-3M (11K65M),446,3,24
Mission,object,4630,0,4556,DSP,8,3,51
RocketStatus,object,4630,0,2,Retired,3620,6,7
Price,object,1265,3365,65,450,136,1,8
MissionStatus,object,4630,0,4,Success,4162,7,17

Unnamed: 0,Company,Location,Date,Time,Rocket,Mission,RocketStatus,Price,MissionStatus
821,RVSN USSR,"Site 90/20, Baikonur Cosmodrome, Kazakhstan",23/12/1969,09:25:00,Tsyklon-2,Cosmos 316,Retired,,Success
1844,General Dynamics,"SLC-3E, Vandenberg AFB, California, USA",09/02/1980,23:08:00,Atlas-E/F SGS-1,GPS-5,Retired,,Success
1678,RVSN USSR,"Site 132/2, Plesetsk Cosmodrome, Russia",08/12/1977,11:00:00,Cosmos-3M (11K65M),Cosmos 965,Retired,,Success


In [7]:
space_location = space.groupby('Location').size().reset_index(name='count')
space_location = space_location['Location']

In [None]:
space_location

0                                              Blue Origin Launch Site, West Texas, Texas, USA
1                                                         Brigitte, Hammaguir, Algeria, France
2                                      Cosmic Girl, Mojave Air and Space Port, California, USA
3                                                                     DeBo 3 Barge, Yellow Sea
4                            Douglas F4D Skyray, Naval Air Station Point Mugu, California, USA
5                                            ELA-1, Guiana Space Centre, French Guiana, France
6                                            ELA-2, Guiana Space Centre, French Guiana, France
7                                            ELA-3, Guiana Space Centre, French Guiana, France
8                                              ELD, Guiana Space Centre, French Guiana, France
9                                              ELS, Guiana Space Centre, French Guiana, France
10                                     ELV-1 (SLV)

In [8]:
space_location.to_csv(f"{staging_area}space_location.csv", index=False)

# GeoPy

In [9]:
space_location = pd.read_csv(f"{staging_area}space_location.csv")

In [10]:
space_location

Unnamed: 0,Location
0,"Blue Origin Launch Site, West Texas, Texas, USA"
1,"Brigitte, Hammaguir, Algeria, France"
2,"Cosmic Girl, Mojave Air and Space Port, California, USA"
3,"DeBo 3 Barge, Yellow Sea"
4,"Douglas F4D Skyray, Naval Air Station Point Mugu, California, USA"
5,"ELA-1, Guiana Space Centre, French Guiana, France"
6,"ELA-2, Guiana Space Centre, French Guiana, France"
7,"ELA-3, Guiana Space Centre, French Guiana, France"
8,"ELD, Guiana Space Centre, French Guiana, France"
9,"ELS, Guiana Space Centre, French Guiana, France"


In [None]:
geolocator = Nominatim(user_agent="my_email@myserver.com")

In [None]:
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)
space_location['location'] = space_location['Location'].progress_apply(geocode)
space_location['point'] = space_location['location'].apply(lambda loc: tuple(loc.point) if loc else None)
space_location[['latitude', 'longitude', 'altitude']] = pd.DataFrame(space_location['point'].tolist(), index=space_location.index)
space_location.to_csv(f"{staging_area}space_location_point.csv", index=False)

# Join

In [11]:
space_location_point = pd.read_csv(f"{staging_area}space_location_point.csv", encoding='latin-1')
space = pd.read_csv(f"{ingest}space_missions.csv", encoding='latin-1')

In [12]:
space = pd.merge(space, space_location_point, on='Location',how='left')

In [13]:
info(space)




Unnamed: 0,columns,rows,rows_whitout_NaN,rows_whit_NaN,rows_duplicate,rows_unique,memory_usage
dataframe,12,4630,1259,3371,1,4629,3.14 MB

Unnamed: 0,dtypes,not_NaN,NaN,unique,mean,std,min,max,25%,50%,75%
point_latitude,float64,4630,0,43,40.102,18.702,-39.26,75.438,28.493,38.849,62.928
point_longitude,float64,4630,0,43,3.654,79.193,-159.782,177.866,-80.577,40.575,63.305

Unnamed: 0,dtypes,not_NaN,NaN,unique,top,freq,min_len,max_len
Company,object,4630,0,62,RVSN USSR,1777,2,16
Location,object,4630,0,157,"Site 31/6, Baikonur Cosmodrome, Kazakhstan",251,24,87
Date,object,4630,0,4180,26/04/1962,4,10,10
Time,object,4503,127,1300,12:00:00,52,3,8
Rocket,object,4630,0,370,Cosmos-3M (11K65M),446,3,24
Mission,object,4630,0,4556,DSP,8,3,51
RocketStatus,object,4630,0,2,Retired,3620,6,7
Price,object,1265,3365,65,450,136,1,8
MissionStatus,object,4630,0,4,Success,4162,7,17
point,object,4630,0,43,"62.9279427737354, 40.5747978835195",1278,23,36

Unnamed: 0,Company,Location,Date,Time,Rocket,Mission,RocketStatus,Price,MissionStatus,point_latitude,point_longitude,point
3810,ULA,"SLC-41, Cape Canaveral AFS, Florida, USA",13/03/2015,02:44:00,Atlas V 421,MMS,Active,123.0,Success,28.493244,-80.577031,"28.4932443676401, -80.5770310995121"
3535,ULA,"SLC-37B, Cape Canaveral AFS, Florida, USA",18/01/2009,02:47:00,Delta IV Heavy,NROL-26,Active,350.0,Success,28.493244,-80.577031,"28.4932443676401, -80.5770310995121"
3172,MITT,"Svobodny Cosmodrome, Russia",20/02/2001,08:48:00,Start-1,Odin,Active,,Success,51.850215,128.355347,"51.8502151829541, 128.355347313766"


In [16]:
space['number']=1

In [17]:
space_company = space.groupby('Company').size().reset_index(name='count')
space_company

Unnamed: 0,Company,count
0,AEB,3
1,AMBA,8
2,ASI,9
3,Arianespace,293
4,Armï¿½e de l'Air,4
5,Astra,7
6,Blue Origin,21
7,Boeing,136
8,CAS Space,1
9,CASC,338


In [18]:
space.to_csv(f"{staging_area}space.csv", index=False)

# Rich

In [None]:
!pip install wikipedia
!pip install wikiscraper

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wikipedia
  Downloading wikipedia-1.4.0.tar.gz (27 kB)
Building wheels for collected packages: wikipedia
  Building wheel for wikipedia (setup.py) ... [?25l[?25hdone
  Created wheel for wikipedia: filename=wikipedia-1.4.0-py3-none-any.whl size=11695 sha256=afc0070d8d63459a65fe54f4968710c756d4e1d671e1c5ab369757b5fd05c4af
  Stored in directory: /root/.cache/pip/wheels/15/93/6d/5b2c68b8a64c7a7a04947b4ed6d89fb557dcc6bc27d1d7f3ba
Successfully built wikipedia
Installing collected packages: wikipedia
Successfully installed wikipedia-1.4.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wikiscraper
  Downloading wikiscraper-1.1.6-py3-none-any.whl (11 kB)
Installing collected packages: wikiscraper
Successfully installed wikiscraper-1.1.6


In [None]:
import wikipedia
import wikiscraper as ws
ws.lang("en")

In [19]:
space_rich = pd.read_csv(f"{staging_area}space.csv")

In [None]:
rich = []
for i in space_company["Company"]:
  a = wikipedia.search(i)
  print(i)
  print(a)
  name = input()
  result = ws.searchBySlug(name)
  title = result.getTitle()
  title = title.strip()
  try:
    abstract = result.getAbstract()[0]
    abstract = abstract.strip()
  except:
    print("Abstract?")
    abstract = input()
  url = result.getURL()
  url = url.strip()
  url = url.replace(" ", "_")
  print("Country?")
  country = input()
  print("Government space agency or Private spaceflight company?")
  gov_pUriv = input()
  rich.append({"title":title,
            "abs":abstract,
            "url":url,
            "country":country,
            "gov_priv":gov_priv,
            "Company":i})

AEB
['AEB', 'AEBS', 'Collision avoidance system', 'Associated Examining Board', 'American Egg Board', 'Euro NCAP', 'Brazilian Space Agency', 'Syllogism', 'List of countries by irreligion', 'NEAB']
Brazilian Space Agency
Country?
Brazil
Government space agency or Private spaceflight company?
Government space agency
AMBA
['Amba', 'Amba (Mahabharata)', 'Amba (condiment)', 'Advanced Microcontroller Bus Architecture', 'Amba (landform)', 'Mysore Palace', 'Amba Matha', 'Amba (film)', 'Amba (river)', 'Association of MBAs']
Army Ballistic Missile Agency
Country?
U.S.A.
Government space agency or Private spaceflight company?
Government space agency
ASI
['ASI', 'ASIS', 'Mahāvākyas', 'Asi (TV series)', 'Asi (Mahabharata)', 'Ojos Así', 'Ocurrió Así', 'Archaeological Survey of India', 'Vivir Así', 'Arsenide iodide']
Italian Space Agency
Country?
Italy
Government space agency or Private spaceflight company?
Government space agency
Arianespace
['Arianespace', 'Ariane 5', 'Vega (rocket)', 'Soyuz at the

In [None]:
rich_02 = pd.DataFrame(data=rich)

In [None]:
rich_02.to_csv(f"{staging_area}rich.csv", index=False)

In [20]:
rich = pd.read_csv(f"{staging_area}rich.csv")
space = pd.read_csv(f"{staging_area}space.csv")
space_rich = pd.merge(space, rich, on='Company',how='left')
space_rich.drop_duplicates(inplace=True)
space_rich.to_csv(f"{staging_area}space_rich.csv", index=False)

In [21]:
info(space_rich)




Unnamed: 0,columns,rows,rows_whitout_NaN,rows_whit_NaN,rows_duplicate,rows_unique,memory_usage
dataframe,18,4629,1258,3371,0,4629,6.69 MB

Unnamed: 0,dtypes,not_NaN,NaN,unique,mean,std,min,max,25%,50%,75%
point_latitude,float64,4629,0,43,40.102,18.704,-39.26,75.438,28.493,38.849,62.928
point_longitude,float64,4629,0,43,3.633,79.189,-159.782,177.866,-80.577,40.575,63.305
number,int64,4629,0,1,1.0,0.0,1.0,1.0,1.0,1.0,1.0

Unnamed: 0,dtypes,not_NaN,NaN,unique,top,freq,min_len,max_len
Company,object,4629,0,62,RVSN USSR,1777,2,16
Location,object,4629,0,157,"Site 31/6, Baikonur Cosmodrome, Kazakhstan",251,24,87
Date,object,4629,0,4180,28/08/1990,4,10,10
Time,object,4502,127,1300,12:00:00,52,3,8
Rocket,object,4629,0,370,Cosmos-3M (11K65M),446,3,24
Mission,object,4629,0,4556,DSP,8,3,51
RocketStatus,object,4629,0,2,Retired,3620,6,7
Price,object,1264,3365,65,450,136,1,8
MissionStatus,object,4629,0,4,Success,4161,7,17
point,object,4629,0,43,"62.9279427737354, 40.5747978835195",1278,23,36

Unnamed: 0,Company,Location,Date,Time,Rocket,Mission,RocketStatus,Price,MissionStatus,point_latitude,point_longitude,point,number,title,abs,url,country,gov_priv
3139,Sea Launch,"LP Odyssey, Kiritimati Launch Area, Pacific Ocean",28/07/2000,22:41:00,Zenit-3 SL,PAS 9,Retired,,Success,1.883516,-157.427173,"1.88351587752264, -157.42717305212",1,Sea Launch,"Sea Launch was a multinational Norway, Russia, Ukraine, United States spacecraft launch company founded in 1995 that provided orbital launch services from 1999 to 2014. The company used a mobile maritime launch platform for equatorial launches of commercial payloads on specialized Zenit-3SL rockets from a former mobile/floating oil drilling rig renamed Odyssey.",https://en.wikipedia.org/wiki/Sea_Launch,Switzerland,Private spaceflight company
1016,RVSN USSR,"Site 43/3, Plesetsk Cosmodrome, Russia",07/10/1971,12:30:00,Voskhod,Cosmos 443,Retired,,Success,62.927943,40.574798,"62.9279427737354, 40.5747978835195",1,Russian Space Forces,"The Russian Space Forces are a branch of the Russian Aerospace Forces, that provides aerospace warning, air and space sovereignty, and other related protection for Russia. Having been reestablished following August 1, 2015 merger between the Russian Air Force and the Russian Aerospace Defence Forces after the independent arm of service was dissolved in 2011.",https://en.wikipedia.org/wiki/Russian_Space_Forces,Russia,Government space agency
2602,General Dynamics,"SLC-36B, Cape Canaveral AFS, Florida, USA",07/12/1991,22:47:00,Atlas II,Eutelsat 2F3,Retired,,Success,28.493244,-80.577031,"28.4932443676401, -80.5770310995121",1,General Dynamics,"General Dynamics Corporation (GD) is an American publicly traded, aerospace and defense corporation headquartered in Reston, Virginia. As of 2020, it was the fifth-largest defense contractor in the world by arms sales, and 5th largest in the United States by total sales. The company is a Fortune 100 company, and was ranked No. 83 in 2020.",https://en.wikipedia.org/wiki/General_Dynamics,U.S.A.,Private spaceflight company
