In [3]:
# Here we will be taking csv data at "./data/USA_meteorite_data.csv" 
# and binning the data into a new csv where each bin has a count of the number of meteorites that fell in that bin.
# we will also include the latitude and longitude of the center of the bin.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# import ./data/USA_meteorite_data.csv
orgignal_csv = pd.read_csv('../../data/USA_meteorite_data.csv', header=0)

# the first column of the csv is the index from the previous csv, lets remove it
orgignal_csv = orgignal_csv.drop(columns=['Unnamed: 0'])
orgignal_csv.head(n=1)

Unnamed: 0,name,id,nametype,recclass,mass (g),fall,year,reclat,reclong,GeoLocation
0,Allegan,2276,Valid,H5,32000.0,Fell,1899.0,42.53333,-85.88333,"(42.53333, -85.88333)"


In [25]:
# here we bin the data using np.histogram2d
# we will set the number of bins to 100
# we will also set the range of the bins to be the range of the data
# we will also set the density to true so that we get the number of meteorites in each bin
n_bins = 100
(lat_min, lat_max) = (orgignal_csv['reclat'].min(), orgignal_csv['reclat'].max())
(lon_min, lon_max) = (orgignal_csv['reclong'].min(), orgignal_csv['reclong'].max())

counts_hist, lat_edges, lon_edges = np.histogram2d(orgignal_csv['reclat'], orgignal_csv['reclong'], bins=n_bins, range=[[lat_min, lat_max], [lon_min, lon_max]])

# here we will create a new dataframe with the binned data
# we will also include the latitude and longitude of the center of the bin
binned_data = pd.DataFrame(columns=['lat', 'lon', 'count'])
# iterate through the bins and calculate the center of the bin
for i in range(n_bins):
    for j in range(n_bins):
        lat_center = (lat_edges[i] + lat_edges[i+1])/2
        lon_center = (lon_edges[j] + lon_edges[j+1])/2
        # append with pd.concat
        binned_data = pd.concat([binned_data, pd.DataFrame([[lat_center, lon_center, counts_hist[i][j]]], columns=['lat', 'lon', 'count'])], ignore_index=True)

# multiply the count by the number of meteorites in the original data
# binned_data['count'] = binned_data['count'] * len(orgignal_csv)

# print data to csv file
binned_data.to_csv(f'../../data/USA_meteorite_data_binned_bins{n_bins}.csv')

  binned_data = pd.concat([binned_data, pd.DataFrame([[lat_center, lon_center, counts_hist[i][j]]], columns=['lat', 'lon', 'count'])], ignore_index=True)


In [26]:
# Given the above binning code, generate bins for bins range(10:700:10)
# and generate a csv for each binning
for i in range(10, 700, 10):
    counts_hist, lat_edges, lon_edges = np.histogram2d(orgignal_csv['reclat'], orgignal_csv['reclong'], bins=i, range=[[lat_min, lat_max], [lon_min, lon_max]])
    binned_data = pd.DataFrame(columns=['lat', 'lon', 'count'])
    for j in range(i):
        for k in range(i):
            lat_center = (lat_edges[j] + lat_edges[j+1])/2
            lon_center = (lon_edges[k] + lon_edges[k+1])/2
            binned_data = pd.concat([binned_data, pd.DataFrame([[lat_center, lon_center, counts_hist[j][k]]], columns=['lat', 'lon', 'count'])], ignore_index=True)
    binned_data.to_csv(f'../../data/USA_meteorite_data_binned_bins{i}.csv')
    

  binned_data = pd.concat([binned_data, pd.DataFrame([[lat_center, lon_center, counts_hist[j][k]]], columns=['lat', 'lon', 'count'])], ignore_index=True)
  binned_data = pd.concat([binned_data, pd.DataFrame([[lat_center, lon_center, counts_hist[j][k]]], columns=['lat', 'lon', 'count'])], ignore_index=True)
  binned_data = pd.concat([binned_data, pd.DataFrame([[lat_center, lon_center, counts_hist[j][k]]], columns=['lat', 'lon', 'count'])], ignore_index=True)
  binned_data = pd.concat([binned_data, pd.DataFrame([[lat_center, lon_center, counts_hist[j][k]]], columns=['lat', 'lon', 'count'])], ignore_index=True)
  binned_data = pd.concat([binned_data, pd.DataFrame([[lat_center, lon_center, counts_hist[j][k]]], columns=['lat', 'lon', 'count'])], ignore_index=True)
  binned_data = pd.concat([binned_data, pd.DataFrame([[lat_center, lon_center, counts_hist[j][k]]], columns=['lat', 'lon', 'count'])], ignore_index=True)
  binned_data = pd.concat([binned_data, pd.DataFrame([[lat_center, lon_cente