In [3]:
# for the following data format:
# lat, lon, count
# where count is the number of meteors in that bin

# this notebook will be used to create the gaussian blurred pseudo images
# for the bins, it does this by finding non-zero bins and then
# increasing the count of the surrounding bins by a fraction of the
# original bin count, the fraction is determined by the gaussian
# distribution

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# import the data
df = pd.read_csv("../../data/USA_meteorite_data_binned_bins300.csv", header=0)

# find all the non-zero bins
non_zero = df[df["count"] > 0]

# create a new dataframe to store the gaussian blurred data
df_blurred = pd.DataFrame(columns=["lat", "lon", "count"])

# loop through the non-zero bins
for i in range(len(non_zero)):
    # get the lat, lon and count of the bin
    lat = non_zero.iloc[i]["lat"]
    lon = non_zero.iloc[i]["lon"]
    count = non_zero.iloc[i]["count"]

    # find the surrounding bins
    # the surrounding bins are the 10 nearest bins
    # these are found by calculating the distance between the bin
    # and all the other bins, then sorting the bins by distance
    # and taking the first 10 bins
    distances = {}
    for j in range(len(df)):
        lat2 = df.iloc[j]["lat"]
        lon2 = df.iloc[j]["lon"]
        distance = np.sqrt((lat - lat2)**2 + (lon - lon2)**2)
        # append the distance and the index of the bin to the distances dict
        distances[j] = distance
    # sort the distances dict by distance
    sorted_distances = sorted(distances.items(), key=lambda x: x[1])
    # get the 10 nearest bins
    nearest_bins = sorted_distances[:10]

    # loop through the nearest bins
    for j in range(len(nearest_bins)):
        # get the index of the bin
        index = nearest_bins[j][0]
        # get the lat, lon and count of the bin
        lat2 = df.iloc[index]["lat"]
        lon2 = df.iloc[index]["lon"]
        count2 = df.iloc[index]["count"]

        # calculate the distance between the bin and the original bin
        distance = np.sqrt((lat - lat2)**2 + (lon - lon2)**2)

        # calculate the fraction of the original bin count to add to the
        # surrounding bin
        # this is done using the gaussian distribution
        # the gaussian distribution is calculated using the distance
        # between the bin and the original bin
        # the mean of the gaussian distribution is 0
        # the standard deviation of the gaussian distribution is 0.1
        # the fraction is calculated by taking the value of the gaussian
        # distribution at the distance between the bin and the original bin
        # and dividing it by the value of the gaussian distribution at 0
        # this is done to ensure that the sum of the fractions is 1
        # the fraction is then multiplied by the original bin count
        # and added to the surrounding bin
        fraction = np.exp(-(distance**2)/(2*0.1**2))
        fraction /= np.exp(-(0**2)/(2*0.1**2))
        fraction *= count
        df_blurred.loc[len(df_blurred)] = [lat2, lon2, count2 + fraction]

# re-add unblurred bins to the dataframe
# these are bins which have lat and lon values which are not in the
# gaussian blurred dataframe (find by seeing if there is an
# entry in the gaussian blurred dataframe with the same lat and lon)
for i in range(len(df)):
    lat = df.iloc[i]["lat"]
    lon = df.iloc[i]["lon"]
    if len(df_blurred[(df_blurred["lat"] == lat) & (df_blurred["lon"] == lon)]) == 0:
        count = df.iloc[i]["count"]
        df_blurred.loc[len(df_blurred)] = [lat, lon, count]

# save the dataframe to a csv file
df_blurred.to_csv("../../data/USA_meteorite_data_binned_bins30_blurred.csv", index=False)
