In [1]:
#Import dependencies
import requests
import pandas as pd
import re

In [2]:
#Set up url
url = 'https://www.windows2universe.org/space_missions/manned_table.html'

#Retrieve page
tables = pd.read_html(url)

#Create df
df_mission = tables[0]

df_mission.head()

Unnamed: 0,0,1,2,3,4
0,Mission(Country),Craft,Launch Date,Crew,Mission Highlights
1,Vostok-1(USSR),Kedr (Cedar),"April 12, 1961",Gagarin,Cosmonaut Yuri Garagin became the first human ...
2,Mercury-3(USA),Freedom 7,"May 5, 1961",Shepard,Alan Shepard became the first American in spac...
3,Mercury-4(USA),Liberty Bell-7,"July 21, 1961",Grissom,"The second US sub-orbital flight, reaching an ..."
4,Vostok-2(USSR),Orel (Eagle),"August 6, 1961",Titov,Titov was the first to spend an entire day in ...


In [3]:
#Set up headers
df_mission.columns = df_mission.iloc[0]
df_mission = df_mission.drop(0)

#Delete "(Country)" from first column header
df_mission = df_mission.rename(columns={"Mission(Country)":"Mission"})

#Delete all rows with "(USSR)" in Mission name
df_mission = df_mission[~df_mission['Mission'].str.contains('(USSR)')]

df_mission.head()

  if __name__ == '__main__':


Unnamed: 0,Mission,Craft,Launch Date,Crew,Mission Highlights
2,Mercury-3(USA),Freedom 7,"May 5, 1961",Shepard,Alan Shepard became the first American in spac...
3,Mercury-4(USA),Liberty Bell-7,"July 21, 1961",Grissom,"The second US sub-orbital flight, reaching an ..."
5,Mercury-6(USA),Friendship 7,"February 20,1962",Glenn,The first US manned orbital flight at an orbit...
6,Mercury-7(USA),Aurora 7,"May 24, 1962",Carpenter,The second US manned orbital flight which orbi...
9,Mercury-8(USA),Sigma 7,"October 3, 1962",Schirra,Walter Schirra orbited the Earth six times dur...


In [4]:
#Reset Index
df_mission = df_mission.reset_index(drop=True)
df_mission.head()

Unnamed: 0,Mission,Craft,Launch Date,Crew,Mission Highlights
0,Mercury-3(USA),Freedom 7,"May 5, 1961",Shepard,Alan Shepard became the first American in spac...
1,Mercury-4(USA),Liberty Bell-7,"July 21, 1961",Grissom,"The second US sub-orbital flight, reaching an ..."
2,Mercury-6(USA),Friendship 7,"February 20,1962",Glenn,The first US manned orbital flight at an orbit...
3,Mercury-7(USA),Aurora 7,"May 24, 1962",Carpenter,The second US manned orbital flight which orbi...
4,Mercury-8(USA),Sigma 7,"October 3, 1962",Schirra,Walter Schirra orbited the Earth six times dur...


In [5]:
#Delete country naming
df_mission['Mission'] = df_mission['Mission'].str[:-5]
df_mission.head()

Unnamed: 0,Mission,Craft,Launch Date,Crew,Mission Highlights
0,Mercury-3,Freedom 7,"May 5, 1961",Shepard,Alan Shepard became the first American in spac...
1,Mercury-4,Liberty Bell-7,"July 21, 1961",Grissom,"The second US sub-orbital flight, reaching an ..."
2,Mercury-6,Friendship 7,"February 20,1962",Glenn,The first US manned orbital flight at an orbit...
3,Mercury-7,Aurora 7,"May 24, 1962",Carpenter,The second US manned orbital flight which orbi...
4,Mercury-8,Sigma 7,"October 3, 1962",Schirra,Walter Schirra orbited the Earth six times dur...


In [6]:
#Remove leading characters and commas from crew column (currently they are only leading, not deliminating)
df_mission['Crew'] = df_mission['Crew'].str.strip(',')
df_mission.head()

Unnamed: 0,Mission,Craft,Launch Date,Crew,Mission Highlights
0,Mercury-3,Freedom 7,"May 5, 1961",Shepard,Alan Shepard became the first American in spac...
1,Mercury-4,Liberty Bell-7,"July 21, 1961",Grissom,"The second US sub-orbital flight, reaching an ..."
2,Mercury-6,Friendship 7,"February 20,1962",Glenn,The first US manned orbital flight at an orbit...
3,Mercury-7,Aurora 7,"May 24, 1962",Carpenter,The second US manned orbital flight which orbi...
4,Mercury-8,Sigma 7,"October 3, 1962",Schirra,Walter Schirra orbited the Earth six times dur...


In [7]:
#Split names for sorting
def split_it(crew_string):
    result = re.sub(r"(\w)([A-Z])", r"\1 \2", crew_string)

    if 'Mac' in result:
        result = result.replace("Mac ", "Mac")
    
    if "Mc" in result:
        result = result.replace("Mc ", "Mc")
        
    if "van den Berg" in result:
        result = "vandenBerg"
    
    if "van Hoften" in result:
        result = "vanHoften"

    return(result)

df_mission['Crew'] = df_mission['Crew'].apply(split_it)
df_mission.head()

Unnamed: 0,Mission,Craft,Launch Date,Crew,Mission Highlights
0,Mercury-3,Freedom 7,"May 5, 1961",Shepard,Alan Shepard became the first American in spac...
1,Mercury-4,Liberty Bell-7,"July 21, 1961",Grissom,"The second US sub-orbital flight, reaching an ..."
2,Mercury-6,Friendship 7,"February 20,1962",Glenn,The first US manned orbital flight at an orbit...
3,Mercury-7,Aurora 7,"May 24, 1962",Carpenter,The second US manned orbital flight which orbi...
4,Mercury-8,Sigma 7,"October 3, 1962",Schirra,Walter Schirra orbited the Earth six times dur...


In [8]:
#Move Crew column to second column
my_column = df_mission.pop('Crew')
df_mission.insert(0, my_column.name, my_column)
df_mission.head()

Unnamed: 0,Crew,Mission,Craft,Launch Date,Mission Highlights
0,Shepard,Mercury-3,Freedom 7,"May 5, 1961",Alan Shepard became the first American in spac...
1,Grissom,Mercury-4,Liberty Bell-7,"July 21, 1961","The second US sub-orbital flight, reaching an ..."
2,Glenn,Mercury-6,Friendship 7,"February 20,1962",The first US manned orbital flight at an orbit...
3,Carpenter,Mercury-7,Aurora 7,"May 24, 1962",The second US manned orbital flight which orbi...
4,Schirra,Mercury-8,Sigma 7,"October 3, 1962",Walter Schirra orbited the Earth six times dur...


In [9]:
#Setting up name split per mission
mission_df = pd.DataFrame(columns=df_mission.columns)
mission_df

#Iterate over df in order to create new df, split names appropriately so each crew member is seperate
for index, row in df_mission.iterrows():
    check = row["Crew"].split(" ")
    
    for name in check:
        mission = row["Mission"]
        craft = row["Craft"]
        launchdate= row["Launch Date"]
        missionhighlights = row["Mission Highlights"]
        simp_df = pd.DataFrame([[name, mission, craft, launchdate, missionhighlights]], columns=mission_df.columns)
        mission_df = mission_df.append(simp_df)

#Apply sort based on Crew Member's Last Name
mission_df = mission_df.sort_values(by=["Crew"])

#Reset index
mission_df = mission_df.reset_index(drop=True)

mission_df.head()

Unnamed: 0,Crew,Mission,Craft,Launch Date,Mission Highlights
0,Acton,51-F,"Challenger, Spacelab","July 29, 1985",Spacelab 2 carried experiments in life science...
1,Adamson,STS-28,Columbia,"August 13, 1989",The fourth classified Department of Defense Sh...
2,Akers,STS-41,Discovery,"October 6, 1990","Deployed the Ulysses space probe, sending it o..."
3,Al-Saud,51-G,Discovery,"June 17, 1985",The first tri-national space crew deployed thr...
4,Aldrin,Gemini-Titan 12,Gemini 12,"November 11, 1966","In this last Gemini mission, Aldrin made three..."


In [10]:
#Final Name fix for the "van" leaders
def fix_name(crew_name):
    if "vandenBerg" in crew_name:
        name = "van den Berg"  
    elif "vanHoften" in crew_name:
        name = "van Hoften"
    else:
        name = crew_name
        
    return(name)

mission_df['Crew'] = mission_df['Crew'].apply(fix_name)
mission_df.head()

Unnamed: 0,Crew,Mission,Craft,Launch Date,Mission Highlights
0,Acton,51-F,"Challenger, Spacelab","July 29, 1985",Spacelab 2 carried experiments in life science...
1,Adamson,STS-28,Columbia,"August 13, 1989",The fourth classified Department of Defense Sh...
2,Akers,STS-41,Discovery,"October 6, 1990","Deployed the Ulysses space probe, sending it o..."
3,Al-Saud,51-G,Discovery,"June 17, 1985",The first tri-national space crew deployed thr...
4,Aldrin,Gemini-Titan 12,Gemini 12,"November 11, 1966","In this last Gemini mission, Aldrin made three..."
