### Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split


# import warnings
# warnings.filterwarnings('ignore')


### Read and clean data

In [None]:
df = pd.read_csv("data/meteorite-landings.csv")
X = df.columns
y = df['recclass']

# X_train, X_test, y_train, y_test = train_test_split(
#     X, y, test_size=0.2, random_state=42
# )

### High-level overview of data

In [None]:
# Calculate how many meteorites fell in each year
year_counts = df.groupby("year").count()["name"]
# Drop years with less than 10 meteorites
year_counts = year_counts[year_counts > 100]

# convert year_counts to int
year_counts = year_counts.astype(int)

# Plot the number of meteorites that fell each year
plt.figure(figsize=(10, 5))
sns.barplot(x=year_counts.index, y=year_counts.values)
plt.xticks(rotation=90)
plt.grid(True)
plt.xlabel("Year")
plt.ylabel("Number of meteorites")
plt.title("Number of meteorites found each year (only > 100 shown)")
plt.show()

In [None]:
# Plot the masses of the meteorites
sns.lineplot(df["mass (g)"])
plt.grid(True)
plt.xlabel("Mass (g)")
plt.ylabel("Number of meteorites")
plt.title("Mass of meteorites found")
plt.show()


In [None]:
# show a map of the world with the meteorite landings
import folium

# Create a map of the world
m = folium.Map(location=[0, 0], zoom_start=2)

# Add a marker for every meteorite
for index, row in df.iterrows():
    if not pd.isnull(row["reclat"]) and not pd.isnull(row["reclong"]):
        folium.Marker(
            location=[row["reclat"], row["reclong"]],
            popup=row["name"],
            icon=folium.Icon(color="red", icon="info-sign"),
        ).add_to(m)

# Show the map
# m