In [1]:
import pandas as pd

# Load the cleaned dataset
df = pd.read_csv('../data/interim/Meteorite_Landings_cleaned.csv')
df.head()

Unnamed: 0,name,id,nametype,class,mass (g),fall,year,latitude,longitude,category
0,Aachen,1,Valid,L5,21.0,Fell,1880,50.775,6.08333,Chondrite
1,Aarhus,2,Valid,H6,720.0,Fell,1951,56.18333,10.23333,Chondrite
2,Abee,6,Valid,EH4,107000.0,Fell,1952,54.21667,-113.0,Chondrite
3,Acapulco,10,Valid,Acapulcoite,1914.0,Fell,1976,16.88333,-99.9,Achondrite
4,Achiras,370,Valid,L6,780.0,Fell,1902,-33.16667,-64.95,Chondrite


In [2]:
# Check the data types of the columns
df.dtypes

name          object
id             int64
nametype      object
class         object
mass (g)     float64
fall          object
year           int64
latitude     float64
longitude    float64
category      object
dtype: object

In [3]:
import reverse_geocode as rg

# Add country column to the dataset
def get_country(lat, lon):
    result = rg.search([(lat, lon)])
    return result[0]['country']

df['country'] = df.apply(lambda x: get_country(x['latitude'], x['longitude']), axis=1)
df.head()

Unnamed: 0,name,id,nametype,class,mass (g),fall,year,latitude,longitude,category,country
0,Aachen,1,Valid,L5,21.0,Fell,1880,50.775,6.08333,Chondrite,Germany
1,Aarhus,2,Valid,H6,720.0,Fell,1951,56.18333,10.23333,Chondrite,Denmark
2,Abee,6,Valid,EH4,107000.0,Fell,1952,54.21667,-113.0,Chondrite,Canada
3,Acapulco,10,Valid,Acapulcoite,1914.0,Fell,1976,16.88333,-99.9,Achondrite,Mexico
4,Achiras,370,Valid,L6,780.0,Fell,1902,-33.16667,-64.95,Chondrite,Argentina


In [4]:
# Add city column to the dataset
def get_city(lat, lon):
    result = rg.search([(lat, lon)])
    return result[0]['city']

df['city'] = df.apply(lambda x: get_city(x['latitude'], x['longitude']), axis=1)
df.head()

Unnamed: 0,name,id,nametype,class,mass (g),fall,year,latitude,longitude,category,country,city
0,Aachen,1,Valid,L5,21.0,Fell,1880,50.775,6.08333,Chondrite,Germany,Aachen
1,Aarhus,2,Valid,H6,720.0,Fell,1951,56.18333,10.23333,Chondrite,Denmark,Århus
2,Abee,6,Valid,EH4,107000.0,Fell,1952,54.21667,-113.0,Chondrite,Canada,Lamont
3,Acapulco,10,Valid,Acapulcoite,1914.0,Fell,1976,16.88333,-99.9,Achondrite,Mexico,Acapulco de Juárez
4,Achiras,370,Valid,L6,780.0,Fell,1902,-33.16667,-64.95,Chondrite,Argentina,Achiras


In [5]:
# Correct the country for the meteorites that fell in Antarctica
df.loc[df['latitude'] <= -60.0000, 'country'] = 'Antarctica'

In [6]:
# Correct the city for the meteorites that fell in Antarctica
df.loc[df['country'] == 'Antarctica', 'city'] = None

In [7]:
df.head()

Unnamed: 0,name,id,nametype,class,mass (g),fall,year,latitude,longitude,category,country,city
0,Aachen,1,Valid,L5,21.0,Fell,1880,50.775,6.08333,Chondrite,Germany,Aachen
1,Aarhus,2,Valid,H6,720.0,Fell,1951,56.18333,10.23333,Chondrite,Denmark,Århus
2,Abee,6,Valid,EH4,107000.0,Fell,1952,54.21667,-113.0,Chondrite,Canada,Lamont
3,Acapulco,10,Valid,Acapulcoite,1914.0,Fell,1976,16.88333,-99.9,Achondrite,Mexico,Acapulco de Juárez
4,Achiras,370,Valid,L6,780.0,Fell,1902,-33.16667,-64.95,Chondrite,Argentina,Achiras


In [9]:
# Save the dataset
df.to_csv('../data/Meteorite_Landings_20250108_addcountries.csv', index=False)