# Fireball and Bolide Reports

The following code imports the libraries used to fill in missing data in the table.

In [89]:
import pandas as pd
import numpy as np
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

In [90]:
data = pd.read_csv("data/Fireball_And_Bolide_Reports.csv")

The data listed in the csv file is shown below.

In [91]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 92 entries, 0 to 91
Data columns (total 10 columns):
Date/Time - Peak Brightness (UT)       92 non-null object
Latitude (Deg)                         92 non-null object
Longitude (Deg)                        92 non-null object
Altitude (km)                          68 non-null float64
Velocity (km/s)                        7 non-null float64
Velocity Components (km/s): vx         52 non-null float64
Velocity Components (km/s): vy         52 non-null float64
Velocity Components (km/s): vz         52 non-null float64
Total Radiated Energy (J)              92 non-null int64
Calculated Total Impact Energy (kt)    92 non-null float64
dtypes: float64(6), int64(1), object(3)
memory usage: 7.3+ KB


Converting Data Frame to a Numpy Array.

In [92]:
data1 = pd.DataFrame(data).to_numpy()

data1

array([['11/21/2009 08:53:00 PM', '22.0S', '29.2E', 38.0, 32.1, 3.0,
        -17.0, -27.0, 10000000000000, 18.0],
       ['01/09/2015 10:41:11 AM', '2.0N', '28.8E', 36.0, nan, -10.7,
        -7.6, 11.6, 139000000000, 0.41],
       ['05/16/2014 12:42:48 PM', '44.2S', '176.2W', 44.0, nan, 14.4,
        4.6, 6.5, 309000000000, 0.82],
       ['08/23/2014 06:29:41 AM', '61.7S', '132.6E', 22.2, 16.2, -2.3,
        5.7, 16.5, 3820000000000, 7.6],
       ['12/12/2014 06:48:11 AM', '33.5N', '144.9E', 26.3, nan, 11.5,
        -2.8, -2.2, 33000000000, 0.11],
       ['07/29/2014 07:38:07 AM', '49.2S', '172.2W', nan, nan, nan, nan,
        nan, 73000000000, 0.23],
       ['06/01/2013 10:49:48 PM', '65.6S', '138.4E', 28.9, nan, nan, nan,
        nan, 34000000000, 0.12],
       ['02/17/2015 01:19:50 PM', '8.0S', '11.2W', 39.0, nan, -28.2, 3.4,
        4.6, 33000000000, 0.11],
       ['10/17/2014 02:07:36 PM', '4.6S', '66.3W', 39.0, nan, nan, nan,
        nan, 72000000000, 0.23],
       ['12/08/2013 0

Create the ImperativeImputer model, a multivariate imputer that estimates each feature from all the others.
A strategy for imputing missing values by modeling each feature with missing values as a function of other features in a round-robin fashion.

In [93]:
imp = IterativeImputer(max_iter = 10, random_state = 0)

imp.fit(data1[:, 3:])

IterativeImputer(add_indicator=False, estimator=None,
                 imputation_order='ascending', initial_strategy='mean',
                 max_iter=10, max_value=None, min_value=None,
                 missing_values=nan, n_nearest_features=None, random_state=0,
                 sample_posterior=False, tol=0.001, verbose=0)

Defines a value for blank fields based on the analysis of filled-in fields.

In [68]:
X_test = data1[:, 3:]
data3 = np.round(imp.transform(X_test))
data3

array([[ 3.80e+01,  3.20e+01,  3.00e+00, -1.70e+01, -2.70e+01,  1.00e+13,
         1.80e+01],
       [ 3.60e+01,  2.50e+01, -1.10e+01, -8.00e+00,  1.20e+01,  1.39e+11,
         0.00e+00],
       [ 4.40e+01,  1.80e+01,  1.40e+01,  5.00e+00,  6.00e+00,  3.09e+11,
         1.00e+00],
       [ 2.20e+01,  1.60e+01, -2.00e+00,  6.00e+00,  1.60e+01,  3.82e+12,
         8.00e+00],
       [ 2.60e+01,  1.70e+01,  1.20e+01, -3.00e+00, -2.00e+00,  3.30e+10,
         0.00e+00],
       [ 3.20e+01,  2.10e+01,  1.00e+00, -3.00e+00, -1.00e+00,  7.30e+10,
         0.00e+00],
       [ 2.90e+01,  2.00e+01,  1.00e+00, -3.00e+00, -2.00e+00,  3.40e+10,
         0.00e+00],
       [ 3.90e+01,  2.90e+01, -2.80e+01,  3.00e+00,  5.00e+00,  3.30e+10,
         0.00e+00],
       [ 3.90e+01,  2.30e+01,  1.00e+00, -3.00e+00, -1.00e+00,  7.20e+10,
         0.00e+00],
       [ 2.40e+01,  1.80e+01,  2.00e+00,  2.00e+00, -1.10e+01,  6.40e+10,
         0.00e+00],
       [ 3.20e+01,  2.10e+01,  1.00e+00, -3.00e+00, -1.00e+0

In [60]:
data.columns

Index(['Date/Time - Peak Brightness (UT)', 'Latitude (Deg)', 'Longitude (Deg)',
       'Altitude (km)', 'Velocity (km/s)', 'Velocity Components (km/s): vx',
       'Velocity Components (km/s): vy', 'Velocity Components (km/s): vz',
       'Total Radiated Energy (J)', 'Calculated Total Impact Energy (kt)'],
      dtype='object')

Concatenation of the part with modified data and the part with data that was already completely filled.

In [77]:
data4 = np.concatenate((data1[:, :3], data3), axis=1)

data5 = pd.DataFrame(data=data4,
                    columns=data.columns)

data5.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 92 entries, 0 to 91
Data columns (total 10 columns):
Date/Time - Peak Brightness (UT)       92 non-null object
Latitude (Deg)                         92 non-null object
Longitude (Deg)                        92 non-null object
Altitude (km)                          92 non-null object
Velocity (km/s)                        92 non-null object
Velocity Components (km/s): vx         92 non-null object
Velocity Components (km/s): vy         92 non-null object
Velocity Components (km/s): vz         92 non-null object
Total Radiated Energy (J)              92 non-null object
Calculated Total Impact Energy (kt)    92 non-null object
dtypes: object(10)
memory usage: 7.3+ KB


As a consequence of the concatenation, the table columns have all been transformed into non-null object and thus it is necessary to convert back to the original types.

In [None]:
convert = ['Altitude (km)',
           'Velocity (km/s)',
           'Velocity Components (km/s): vx',
          'Velocity Components (km/s): vy',
          'Velocity Components (km/s): vz',
          'Total Radiated Energy (J)', 
          'Calculated Total Impact Energy (kt)']

data5[convert] = data5[convert].apply(pd.to_numeric)

data5.astype({'Total Radiated Energy (J)': 'int64'}).dtypes

data5.info()

In [87]:
data5.corr()

Unnamed: 0,Altitude (km),Velocity (km/s),Velocity Components (km/s): vx,Velocity Components (km/s): vy,Velocity Components (km/s): vz,Total Radiated Energy (J),Calculated Total Impact Energy (kt)
Altitude (km),1.0,0.628922,-0.279122,0.001983,0.010383,-0.146868,-0.155159
Velocity (km/s),0.628922,1.0,-0.598226,-0.550091,-0.259398,-0.056752,-0.059404
Velocity Components (km/s): vx,-0.279122,-0.598226,1.0,-0.071825,-0.130064,0.163155,0.17091
Velocity Components (km/s): vy,0.001983,-0.550091,-0.071825,1.0,0.228595,-0.127386,-0.128682
Velocity Components (km/s): vz,0.010383,-0.259398,-0.130064,0.228595,1.0,-0.024915,-0.03072
Total Radiated Energy (J),-0.146868,-0.056752,0.163155,-0.127386,-0.024915,1.0,0.999349
Calculated Total Impact Energy (kt),-0.155159,-0.059404,0.17091,-0.128682,-0.03072,0.999349,1.0
