In [1]:
import numpy as np 
import pandas as pd
import pdb
import requests

from Classifications import *
from keys import *

In [2]:
df = pd.read_csv("Meteorite_Landings.csv")
df.columns = ['name', 'id', 'nametype', 'recclass', 'mass', 'fall', 'year',
       'reclat', 'reclong', 'GeoLocation']

### Drop mass NaNs & 0.0

In [3]:
df = df[(df.mass > 0.0) & (df.mass.isna() == False)]
df.shape

(45566, 10)

## Classifications

In [4]:
df = Classifications.classify_subclasses(df)

In [5]:
df.major_classification.value_counts()

Chondrite        42032
Achondrite        2178
Iron              1041
Stony-Iron         311
uncategorized        4
Name: major_classification, dtype: int64

In [6]:
df.groupby('major_classification').mean()

Unnamed: 0_level_0,id,mass,reclat,reclong
major_classification,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Achondrite,30896.449954,2832.684123,-24.225972,37.76933
Chondrite,26885.663114,1522.094561,-41.663357,64.302098
Iron,17997.889529,494262.887233,8.131944,-6.022987
Stony-Iron,28007.141479,66261.35083,-2.451259,37.426869
uncategorized,43302.5,0.07825,-79.68333,159.75


## Country Coordinates

In [None]:
lat, long = (0.0, 0.0)
latlng = f"{lat},{long}"

req_str = f"https://maps.googleapis.com/maps/api/geocode/json?latlng={latlng}&key={keys['mapsAPI']}"

# resp = requests.get(req_str).json()
# print(resp)
# for r in resp['results']:
#     if "country" in r['types']:
#         print(r['address_components'][0]['long_name'], "\n")
#         break

In [48]:
geo_df = df[(df.reclat.isna() == False) 
            & (df.reclong.isna() == False) 
            & (df.GeoLocation != '(0.0, 0.0)') # ocean placeholder
            & (df.GeoLocation != '(-71.5, 35.66667)') # antarctica 1
            & (df.GeoLocation != '(-84.0, 168.0)') # antarctice 2
           ]

geo_df.reset_index(inplace=True)
geo_df[21000:21999][geo_df[21000:21999].reclat > 0]

Unnamed: 0,index,name,id,nametype,recclass,mass,fall,year,reclat,reclong,GeoLocation,major_classification,subclass_category
21105,31977,Pedernales,34062,Valid,"Iron, IAB complex",691.00,Found,01/01/1980 12:00:00 AM,30.33333,-98.95000,"(30.33333, -98.95)",Iron,IAB
21106,31978,Peetz,18783,Valid,L6,11500.00,Found,01/01/1937 12:00:00 AM,40.95000,-103.08333,"(40.95, -103.08333)",Chondrite,L
21107,31979,Pei Xian,18784,Valid,Iron,400000.00,Found,01/01/1917 12:00:00 AM,34.70000,117.00000,"(34.7, 117.0)",Iron,Iron
21108,31980,Pelona Mountain,18785,Valid,H5,618.00,Found,01/01/1999 12:00:00 AM,33.66667,-108.10000,"(33.66667, -108.1)",Chondrite,H
21110,31982,Penokee,18788,Valid,H5,3580.00,Found,01/01/1947 12:00:00 AM,39.35000,-99.91667,"(39.35, -99.91667)",Chondrite,H
21111,31983,Penouille,18789,Valid,"Iron, IAB complex",72.54,Found,01/01/1984 12:00:00 AM,48.85000,-64.43333,"(48.85, -64.43333)",Iron,IAB
21112,31984,Pep,18791,Valid,OC,591.00,Found,01/01/1966 12:00:00 AM,33.73000,-102.57667,"(33.73, -102.57667)",Chondrite,C
21113,31985,Perryton,18794,Valid,LL6,2114.00,Found,01/01/1975 12:00:00 AM,36.35333,-100.73167,"(36.35333, -100.73167)",Chondrite,LL
21114,31986,Perryville,18795,Valid,"Iron, IIC",17500.00,Found,01/01/1906 12:00:00 AM,37.73333,-89.85000,"(37.73333, -89.85)",Iron,Iron
21115,31987,Persimmon Creek,18796,Valid,"Iron, IAB-sLM",5000.00,Found,01/01/1893 12:00:00 AM,35.05000,-84.23333,"(35.05, -84.23333)",Iron,IAB


In [8]:
antarctic_df = df[(df.GeoLocation == '(-71.5, 35.66667)') | (df.GeoLocation == '(-84.0, 168.0)')]
antarctic_df.groupby('major_classification').mean()

Unnamed: 0_level_0,id,mass,reclat,reclong
major_classification,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Achondrite,26280.362245,187.689796,-73.285714,54.571431
Chondrite,24842.045527,76.233004,-76.453484,88.10755
Iron,26194.25,2082.5525,-72.541667,46.694448
Stony-Iron,23465.714286,16.765714,-78.642857,111.285716


In [28]:
# import math
# for i in range(math.ceil(geo_df.shape[0] / 1000)):
#     file = f"countries/countries-{i}.csv"
#     with open(file, "w") as f:
#         f.write("id, reclat, reclong, country, country_abrv\n")
    
    #     !type nul > {file}  
# my_str = f"countries/{i}.csv"
# !type nul > {my_str}

In [34]:
import csv
def get_countries(df, start_idx):
    for i in range(start_idx, df.shape[0]):
        
        file = f"countries/countries-{(i // 1000)}.csv"
        
        row_id, lat, lng = (df.id[i], df.reclat[i], df.reclong[i])
        
        req_str = f"https://maps.googleapis.com/maps/api/geocode/json?latlng={lat},{lng}&key={keys['mapsAPI']}"

        resp = requests.get(req_str).json()

        for r in resp['results']:
            if "country" in r['types']:
                country = r['address_components'][0]['long_name']
                country_short_name = r['address_components'][0]['short_name']
        
        try:    
            with open(file, 'a') as countries:
                country_writer = csv.writer(countries, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

                country_writer.writerow([row_id, lat, lng, country, country_short_name])
        except:
            print(i)
            return
        


# UNCOMMENT TO RUN API CALLS
# get_countries(geo_df[17062:].reset_index())
# get_countries(geo_df.reset_index(), start_idx=0)
get_countries(geo_df[21000:21999][geo_df[21000:21999].reclat > 0], start_idx=0)

In [None]:
df_test['country'] = countries
df_test