In [5]:
 import pandas as pd
import numpy as np
import requests
import json
import logging

# Configure logging
logging.basicConfig(filename='question_7_log.txt', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Requesting content from the URL with GET method
response = requests.get('https://data.nasa.gov/resource/y77d-th95.json')

# Check if the request was successful
if response.status_code == 200:
    # Extract the JSON data
    json_data = response.json()

    # Save the JSON data to a file
    with open('json_data.json', 'w') as json_file:
        json.dump(json_data, json_file)
    logging.info("JSON data has been saved successfully.")
else:
    logging.error("Error occurred while requesting the data.")

# Load JSON data
with open('json_data.json') as json_file:
    data = json.load(json_file)

# Convert JSON data to a DataFrame
structured_dataset = pd.json_normalize(data)

# Rename columns
old_columns = ['name', 'id', 'nametype', 'recclass', 'mass', 'year', 'reclat', 'reclong', 'geolocation.coordinates']
new_columns = ['Name', 'Id', 'Nametype', 'Recclass', 'Mass', 'Year', 'Reclat', 'Reclong', 'Pointer Coordinates']
structured_dataset.columns=structured_dataset.columns.map(dict(zip(old_columns,new_columns)))
# Drop Irrelevant columns 
structured_dataset.drop(np.nan,axis=1,inplace=True)

# Convert data types
structured_dataset['Mass'] = structured_dataset['Mass'].astype(float)
structured_dataset['Id'] = structured_dataset['Id'].astype(int)
structured_dataset['Year'] = structured_dataset['Year'].apply(lambda x: int(x[:4]) if isinstance(x, str) else 0)
structured_dataset['Reclat'] = structured_dataset['Reclat'].astype(float)
structured_dataset['Reclong'] = structured_dataset['Reclong'].astype(float)

# Save the DataFrame as an Excel file
structured_dataset.to_excel('structured_dataset.xlsx', index=False)
logging.info("Structured dataset has been saved as structured_dataset.xlsx.")

In [6]:
df=structured_dataset.copy()

In [7]:
df.head()

Unnamed: 0,Name,Id,Nametype,Recclass,Mass,Year,Reclat,Reclong,Pointer Coordinates
0,Aachen,1,Valid,L5,21.0,1880,50.775,6.08333,"[6.08333, 50.775]"
1,Aarhus,2,Valid,H6,720.0,1951,56.18333,10.23333,"[10.23333, 56.18333]"
2,Abee,6,Valid,EH4,107000.0,1952,54.21667,-113.0,"[-113, 54.21667]"
3,Acapulco,10,Valid,Acapulcoite,1914.0,1976,16.88333,-99.9,"[-99.9, 16.88333]"
4,Achiras,370,Valid,L6,780.0,1902,-33.16667,-64.95,"[-64.95, -33.16667]"


In [11]:
df.shape

(1000, 9)

In [52]:
#Question 1: Get all the Earth meteorites that fell before the year 2000.

meteorites_that_fell_before_the_year_2000=df[df['Year']<2000].dropna()

print(f'''all the Earth meteorites that fell before the year 2000.
{meteorites_that_fell_before_the_year_2000['Mass'].count()}

''')

all the Earth meteorites that fell before the year 2000.
894




In [51]:
# Create a folium map centered on a specific location
map = folium.Map(location=[meteorites_that_fell_before_the_year_2000['Reclat'].mean(), meteorites_that_fell_before_the_year_2000['Reclong'].mean()], zoom_start=2)

# Add markers for each coordinate in the subset
for _, row in meteorites_that_fell_before_the_year_2000.iterrows():
    folium.Marker(location=[row['Reclat'], row['Reclong']], popup=row['Name']).add_to(map)

# Display the map
map

In [37]:
#Question 2: Get all the Earth meteorites' coordinates that fell before the year 1970.

earth_meteorites_before_1970=df[df['Year']<1970]['Pointer Coordinates'].dropna()

print(''' Earth meteorites' coordinates that fell before the year 1970.''')
print(len(earth_meteorites_before_1970))

 Earth meteorites' coordinates that fell before the year 1970.
775


In [36]:
import folium
# Select a subset of coordinates (e.g., first 1000)
subset_coordinates = earth_meteorites_before_1970

# Create a folium map centered on a specific location
map = folium.Map(location=[subset_coordinates[4][0], subset_coordinates[4][1]], zoom_start=2)

# Add markers for each coordinate in the subset
for row in subset_coordinates:
    folium.Marker(location=row).add_to(map)

# Display the map
map

In [42]:
Mass_greater_than_10000=df[df['Mass']>10000]
print('mass of the earth meteorites more than 10000kg')
print(Mass_greater_than_10000['Name'].count())

mass of the earth meteorites more than 10000kg
243


In [46]:
# Create a folium map centered on a specific location
map = folium.Map(location=[Mass_greater_than_10000['Reclat'].mean(), Mass_greater_than_10000['Reclong'].mean()], zoom_start=2)

# Add markers for each coordinate in the subset
for _, row in Mass_greater_than_10000.iterrows():
    folium.Marker(location=[row['Reclat'], row['Reclong']], popup=row['Name']).add_to(map)

# Display the map
map