In [10]:
import pandas as pd
import numpy as np
import json
import csv

def structure_json(data):
    
    # Downloding data from given source and converting it into dataframe
    df = pd.json_normalize(data["data"])

    # Changing Datatypes of the columns in dataframe
    df['id'] = df['id'].astype(int)
    df['mass'] = df['mass'].astype(float)
    df['year'] = pd.to_datetime(df['year'])
    df['reclat'] = df['reclat'].astype(float)
    df['reclong'] = df['reclong'].astype(float)

    # returning dataframe
    return df



json_data =  {
  "data": [
    {
     "name": "Tomokovka",
     "id": "24019",
     "nametype": "Valid",
     "recclass": "LL6",
     "mass": "600",
     "fall": "Fell",
     "year": "1905-01-01T00:00:00.000",
     "reclat": "47.850000",
     "reclong": "34.766670",
     "geolocation": {
        "type": "Point",
        "coordinates": [
            34.76667,
            47.85
                    ]
     }    
    },{
        "name": "Tomo",
        "id": "24020",
        "nametype": "Valid",
        "recclass": "LL8",
        "mass": "10010",
        "fall": "Fell",
        "year": "1965-01-01T00:00:00.000",
        "reclat": "47.850000",
        "reclong": "34.766670",
        "geolocation": {
            "type": "Point",
            "coordinates": [
                34.76667,
                47.85
                        ]
     }
    },{
        "name": "Tomoakno",
        "id": "24021",
        "nametype": "Not-Valid",
        "recclass": "LL5",
        "mass": "12000",
        "fall": "Pass",
        "year": "2002-01-01T00:00:00.000",
        "reclat": "47.850000",
        "reclong": "34.766670",
        "geolocation": {
            "type": "Point",
            "coordinates": [
                34.76667,
                47.85
                        ]
     }
    },{
        "name": "Somnath",
        "id": "24022",
        "nametype": "Not-Valid",
        "recclass": "LL10",
        "mass": "700",
        "fall": "Pass",
        "year": "1998-01-01T00:00:00.000",
        "reclat": "47.850000",
        "reclong": "34.766670",
        "geolocation": {
            "type": "Point",
            "coordinates": [
                34.76667,
                47.85
                        ]
     }
    }
  ]
}




df = structure_json(json_data)
df       

Unnamed: 0,name,id,nametype,recclass,mass,fall,year,reclat,reclong,geolocation.type,geolocation.coordinates
0,Tomokovka,24019,Valid,LL6,600.0,Fell,1905-01-01,47.85,34.76667,Point,"[34.76667, 47.85]"
1,Tomo,24020,Valid,LL8,10010.0,Fell,1965-01-01,47.85,34.76667,Point,"[34.76667, 47.85]"
2,Tomoakno,24021,Not-Valid,LL5,12000.0,Pass,2002-01-01,47.85,34.76667,Point,"[34.76667, 47.85]"
3,Somnath,24022,Not-Valid,LL10,700.0,Pass,1998-01-01,47.85,34.76667,Point,"[34.76667, 47.85]"


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 11 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   name                     4 non-null      object        
 1   id                       4 non-null      int64         
 2   nametype                 4 non-null      object        
 3   recclass                 4 non-null      object        
 4   mass                     4 non-null      float64       
 5   fall                     4 non-null      object        
 6   year                     4 non-null      datetime64[ns]
 7   reclat                   4 non-null      float64       
 8   reclong                  4 non-null      float64       
 9   geolocation.type         4 non-null      object        
 10  geolocation.coordinates  4 non-null      object        
dtypes: datetime64[ns](1), float64(3), int64(1), object(6)
memory usage: 480.0+ bytes


In [12]:
# Get all the Earth meteorites that fell before the year 2000
df[df['year'].dt.year < 2000]

Unnamed: 0,name,id,nametype,recclass,mass,fall,year,reclat,reclong,geolocation.type,geolocation.coordinates
0,Tomokovka,24019,Valid,LL6,600.0,Fell,1905-01-01,47.85,34.76667,Point,"[34.76667, 47.85]"
1,Tomo,24020,Valid,LL8,10010.0,Fell,1965-01-01,47.85,34.76667,Point,"[34.76667, 47.85]"
3,Somnath,24022,Not-Valid,LL10,700.0,Pass,1998-01-01,47.85,34.76667,Point,"[34.76667, 47.85]"


In [13]:
# Get all the earth meteorites co-ordinates who fell before the year 1970
df[df['year'].dt.year < 1970]['geolocation.coordinates']

0    [34.76667, 47.85]
1    [34.76667, 47.85]
Name: geolocation.coordinates, dtype: object

In [14]:
# Assuming that the mass of the earth meteorites was in kg, get all those whose mass was more than 10000kg
df[df['mass'] > 10000]

Unnamed: 0,name,id,nametype,recclass,mass,fall,year,reclat,reclong,geolocation.type,geolocation.coordinates
1,Tomo,24020,Valid,LL8,10010.0,Fell,1965-01-01,47.85,34.76667,Point,"[34.76667, 47.85]"
2,Tomoakno,24021,Not-Valid,LL5,12000.0,Pass,2002-01-01,47.85,34.76667,Point,"[34.76667, 47.85]"
