## Module 1 -- Part 1.1 -- Collecting The Data

In [None]:
import requests
import pandas as pd
import numpy as np
import datetime
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.expand_frame_repr',False)

### Objectives
#### 1. Request to the SpaceX API
#### 2. Clean the requested data

##### # Now let's start requesting rocket launch data from SpaceX API with the following URL:

In [None]:
spacex_url="https://api.spacexdata.com/v4/launches/past"
response = requests.get(spacex_url)
response.content # Output: the contents of the API.

## Task 1: Request and parse the SpaceX launch data using the GET request

In [None]:

static_json_url='https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/API_call_spacex_api.json'

response = requests.get(static_json_url)
(response.status_code) # Output: 200

data = response.json()
data = pd.json_normalize(data)
(data.shape) # (107, 42)

#### Dataframe subset to retain the columns we need 

In [None]:
data = data[['rocket','payloads','launchpad','cores','flight_number','date_utc']]

#### Removing rows wth multiple cores

In [None]:
data = data[data['cores'].map(len)==1]
data = data[data['payloads'].map(len)==1]

#### Since payloads and cores are lists of size 1 we will also extract the single value in the list and replace the feature.

In [None]:
data['cores'] = data['cores'].map(lambda x : x[0])
data['payloads'] = data['payloads'].map(lambda x : x[0])

#### We also want to convert the date_utc to a datetime datatype and then extracting the date leaving the time

In [None]:
data['date'] = pd.to_datetime(data['date_utc']).dt.date

##### Using the date we will restrict the dates of the launches

In [None]:
data = data[data['date'] <= datetime.date(2020, 11, 13)]

#### Global variables 

In [None]:
BoosterVersion = []
PayloadMass = []
Orbit = []
LaunchSite = []
Outcome = []
Flights = []
GridFins = []
Reused = []
Legs = []
LandingPad = []
Block = []
ReusedCount = []
Serial = []
Longitude = []
Latitude = []

## Functions 

#### Takes the dataset and uses the rocket column to call the API and append the data to the list

In [None]:
def getBoosterVersion(data):
    for x in data['rocket']:
       if x:
        response = requests.get("https://api.spacexdata.com/v4/rockets/"+str(x)).json()
        BoosterVersion.append(response['name'])

#### Takes the dataset and uses the launchpad column to call the API and append the data to the list

In [None]:
def getLaunchSite(data):
    for x in data['launchpad']:
       if x:
         response = requests.get("https://api.spacexdata.com/v4/launchpads/"+str(x)).json()
         Longitude.append(response['longitude'])
         Latitude.append(response['latitude'])
         LaunchSite.append(response['name'])

#### Takes the dataset and uses the payloads column to call the API and append the data to the lists

In [None]:
def getPayloadData(data):
    for load in data['payloads']:
       if load:
        response = requests.get("https://api.spacexdata.com/v4/payloads/"+load).json()
        PayloadMass.append(response['mass_kg'])
        Orbit.append(response['orbit'])

#### Takes the dataset and uses the cores column to call the API and append the data to the lists

In [None]:
def getCoreData(data):
    for core in data['cores']:
            if core['core'] != None:
                response = requests.get("https://api.spacexdata.com/v4/cores/"+core['core']).json()
                Block.append(response['block'])
                ReusedCount.append(response['reuse_count'])
                Serial.append(response['serial'])
            else:
                Block.append(None)
                ReusedCount.append(None)
                Serial.append(None)
            Outcome.append(str(core['landing_success'])+' '+str(core['landing_type']))
            Flights.append(core['flight'])
            GridFins.append(core['gridfins'])
            Reused.append(core['reused'])
            Legs.append(core['legs'])
            LandingPad.append(core['landpad'])

#### Call getBoosterVersion

In [None]:
getBoosterVersion(data)

#### The list has now been updated 

In [None]:
(BoosterVersion[0:5]) # Output: ['Falcon 1', 'Falcon 1', 'Falcon 1', 'Falcon 1', 'Falcon 9']

### We apply the rest of the functions. 
#### Call getLaunchSite

In [None]:

getLaunchSite(data)


#### Call getPayloadData

In [None]:
getPayloadData(data)

#### Call getCoreData

In [None]:
getCoreData(data)

#### Finally lets construct our dataset using the data we have obtained. We we combine the columns into a dictionary.

In [None]:
launch_dict = {'FlightNumber': list(data['flight_number']),
'Date': list(data['date']),
'BoosterVersion':BoosterVersion,
'PayloadMass':PayloadMass,
'Orbit':Orbit,
'LaunchSite':LaunchSite,
'Outcome':Outcome,
'Flights':Flights,
'GridFins':GridFins,
'Reused':Reused,
'Legs':Legs,
'LandingPad':LandingPad,
'Block':Block,
'ReusedCount':ReusedCount,
'Serial':Serial,
'Longitude': Longitude,
'Latitude': Latitude}

df = pd.DataFrame(launch_dict)

## Task 2: Filtering through the list to only include Falcon 9 launches.

In [None]:

data_falcon9 = df[df['BoosterVersion'] != 'Falcon 1']

#### Reset the flight number column 

In [None]:
data_falcon9.loc[:,'FlightNumber'] = list(range(1,data_falcon9.shape[0]+1))

### Data Wrangling

#### We can see below that some of the rows are missing values in our dataset.

In [None]:
null_values = (data_falcon9.isnull().sum())
# Output: PayloadMass 5, LandingPad 26

#### Dealing with Missing Values
#### Calculate below the mean for the PayloadMass using the .mean(). Then use the mean and the .replace() function to replace np.nan values in the data with the mean you calculated.

In [None]:
payload_mean = data_falcon9['PayloadMass'].mean().__round__(2) # Output: 6123.55

data_falcon9['PayloadMass'] = data_falcon9['PayloadMass'].replace(np.nan,payload_mean)

#### New null_values

In [None]:

null_values = data_falcon9.isnull().sum()
(null_values) # Output: LandingPad 26

#### Exporting Dataframe to csv

In [None]:



data_falcon9.to_csv('IBM Data Certificate/Course11_Applied_Data_Science_Capstone/dataset_part_1.csv',index=False)