In [3]:
import pandas as pd
import numpy as np
import requests
import json
import logging

# Configure logging
logging.basicConfig(filename='question_4_log.txt', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Requesting content from the URL with GET method
response = requests.get('https://data.nasa.gov/resource/y77d-th95.json')

# Check if the request was successful
if response.status_code == 200:
    # Extract the JSON data
    json_data = response.json()

    # Save the JSON data to a file
    with open('json_data.json', 'w') as json_file:
        json.dump(json_data, json_file)
    logging.info("JSON data has been saved successfully.")
else:
    logging.error("Error occurred while requesting the data.")

# Load JSON data
with open('json_data.json') as json_file:
    data = json.load(json_file)

# Convert JSON data to a DataFrame
structured_dataset = pd.json_normalize(data)

# Rename columns
old_columns = ['name', 'id', 'nametype', 'recclass', 'mass', 'year', 'reclat', 'reclong', 'geolocation.coordinates']
new_columns = ['Name', 'Id', 'Nametype', 'Recclass', 'Mass', 'Year', 'Reclat', 'Reclong', 'Pointer Coordinates']
structured_dataset.columns=structured_dataset.columns.map(dict(zip(old_columns,new_columns)))
# Drop rows with NaN values
structured_dataset.dropna(axis=0, inplace=True)

# Convert data types
structured_dataset['Mass'] = structured_dataset['Mass'].astype(float)
structured_dataset['Id'] = structured_dataset['Id'].astype(int)
structured_dataset['Year'] = structured_dataset['Year'].apply(lambda x: int(x[:4]) if isinstance(x, str) else 0)
structured_dataset['Reclat'] = structured_dataset['Reclat'].astype(float)
structured_dataset['Reclong'] = structured_dataset['Reclong'].astype(float)

# Save the DataFrame as an Excel file
structured_dataset.to_excel('structured_dataset.xlsx', index=False)
logging.info("Structured dataset has been saved as structured_dataset.xlsx.")
