In [1]:
%matplotlib inline

import nasapy
import pandas as pd
import numpy as np
# import folium
# from folium.plugins import HeatMap
import matplotlib.pyplot as plt


# Import API key
from api_keys import nasa_key


# Downloaded all the NASA NEO Earth Close Approaches data from:
## https://cneos.jpl.nasa.gov/ca/ 
## These data starts from 1901 up to the current date as well as predicting the future NEOs until 2200

In [2]:
#Import the 300 year csv file as DataFrame
TwoCenturies_df = pd.read_csv("cneos_closeapproach_data.csv", parse_dates=['Date']).dropna()

# Cleaning up the data, there are k and m for the diameters, need to convert the calues when they have 
TwoCenturies_df['Estimated Diameter (min)'] = TwoCenturies_df['Estimated Diameter (min)'].replace({' km': '*1e3', ' m': '*1'}, regex=True).map(pd.eval).astype(int)
TwoCenturies_df['Estimated Diameter (max)'] = TwoCenturies_df['Estimated Diameter (max)'].replace({' km': '*1e3', ' m': '*1'}, regex=True).map(pd.eval).astype(int)
TwoCenturies_df

Unnamed: 0,Object,Close-Approach (CA) Date,Date,Time,CA Distance Nominal (LD),CA Distance Nominal (au),CA Distance Minimum (LD),CA Distance Minimum (au),V relative (km/s),V infinity (km/s),H (mag),Estimated Diameter (min),Estimated Diameter (max),ObjectName
0,509352 (2007 AG),1900-Jan-04 22:25 ± 00:02,1900-01-04,22:25,3.75,0.00963,3.75,0.00962,8.69,8.65,20.1,250,570,a0509352
1,(2014 SC324),1900-Jan-11 01:03 ± 00:17,1900-01-11,01:03,15.55,0.03995,15.52,0.03989,10.65,10.65,24.3,37,82,bK14SW4C
2,4660 Nereus (1982 DB),1900-Jan-29 18:34 ± 00:14,1900-01-29,18:34,8.09,0.02078,8.08,0.02076,5.55,5.52,18.4,560,1200,a0004660
3,(2015 RW83),1900-Feb-04 02:31 ± 22:21,1900-02-04,02:31,12.63,0.03245,12.46,0.03201,3.13,3.11,24.1,40,90,bK15R83W
4,(2009 BW2),1900-Feb-04 11:05 ± 2_20:19,1900-02-04,11:05,6.86,0.01763,6.4,0.01644,4.27,4.23,25.1,25,57,bK09B02W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24980,(2019 WT1),2200-Nov-21 01:44 ± 4_04:29,2200-11-21,01:44,9.60,0.02466,6.81,0.01749,9.08,9.06,26.9,11,25,bK19W01T
24981,413577 (2005 UL5),2200-Nov-23 02:16 ± 00:05,2200-11-23,02:16,5.84,0.01499,5.78,0.01486,18.16,18.15,20.3,230,520,a0413577
24982,(2014 WT202),2200-Nov-23 03:36 ± 00:04,2200-11-23,03:36,16.33,0.04197,16.23,0.04170,12.00,11.99,21.1,160,360,bK14WK2T
24983,163696 (2003 EB50),2200-Nov-26 06:30 ± < 00:01,2200-11-26,06:30,18.07,0.04643,18.07,0.04643,21.34,21.34,16.5,1300,3000,a0163696


In [3]:
# Create a clean DF to work with
CleanTwoCenturies_df = TwoCenturies_df
CleanTwoCenturies_df

Unnamed: 0,Object,Close-Approach (CA) Date,Date,Time,CA Distance Nominal (LD),CA Distance Nominal (au),CA Distance Minimum (LD),CA Distance Minimum (au),V relative (km/s),V infinity (km/s),H (mag),Estimated Diameter (min),Estimated Diameter (max),ObjectName
0,509352 (2007 AG),1900-Jan-04 22:25 ± 00:02,1900-01-04,22:25,3.75,0.00963,3.75,0.00962,8.69,8.65,20.1,250,570,a0509352
1,(2014 SC324),1900-Jan-11 01:03 ± 00:17,1900-01-11,01:03,15.55,0.03995,15.52,0.03989,10.65,10.65,24.3,37,82,bK14SW4C
2,4660 Nereus (1982 DB),1900-Jan-29 18:34 ± 00:14,1900-01-29,18:34,8.09,0.02078,8.08,0.02076,5.55,5.52,18.4,560,1200,a0004660
3,(2015 RW83),1900-Feb-04 02:31 ± 22:21,1900-02-04,02:31,12.63,0.03245,12.46,0.03201,3.13,3.11,24.1,40,90,bK15R83W
4,(2009 BW2),1900-Feb-04 11:05 ± 2_20:19,1900-02-04,11:05,6.86,0.01763,6.4,0.01644,4.27,4.23,25.1,25,57,bK09B02W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24980,(2019 WT1),2200-Nov-21 01:44 ± 4_04:29,2200-11-21,01:44,9.60,0.02466,6.81,0.01749,9.08,9.06,26.9,11,25,bK19W01T
24981,413577 (2005 UL5),2200-Nov-23 02:16 ± 00:05,2200-11-23,02:16,5.84,0.01499,5.78,0.01486,18.16,18.15,20.3,230,520,a0413577
24982,(2014 WT202),2200-Nov-23 03:36 ± 00:04,2200-11-23,03:36,16.33,0.04197,16.23,0.04170,12.00,11.99,21.1,160,360,bK14WK2T
24983,163696 (2003 EB50),2200-Nov-26 06:30 ± < 00:01,2200-11-26,06:30,18.07,0.04643,18.07,0.04643,21.34,21.34,16.5,1300,3000,a0163696


In [4]:
# Calculate the amount of different objects in the total dataset
Objects_df = CleanTwoCenturies_df.groupby("Object").size()
len(Objects_df)

9545

In [5]:
# Seperate the year, month and day from the date column
CleanTwoCenturies_df['Year'] = pd.DatetimeIndex(TwoCenturies_df['Date']).year
CleanTwoCenturies_df['Month'] = pd.DatetimeIndex(CleanTwoCenturies_df['Date']).month
CleanTwoCenturies_df['Day'] = pd.DatetimeIndex(CleanTwoCenturies_df['Date']).day
CleanTwoCenturies_df

Unnamed: 0,Object,Close-Approach (CA) Date,Date,Time,CA Distance Nominal (LD),CA Distance Nominal (au),CA Distance Minimum (LD),CA Distance Minimum (au),V relative (km/s),V infinity (km/s),H (mag),Estimated Diameter (min),Estimated Diameter (max),ObjectName,Year,Month,Day
0,509352 (2007 AG),1900-Jan-04 22:25 ± 00:02,1900-01-04,22:25,3.75,0.00963,3.75,0.00962,8.69,8.65,20.1,250,570,a0509352,1900,1,4
1,(2014 SC324),1900-Jan-11 01:03 ± 00:17,1900-01-11,01:03,15.55,0.03995,15.52,0.03989,10.65,10.65,24.3,37,82,bK14SW4C,1900,1,11
2,4660 Nereus (1982 DB),1900-Jan-29 18:34 ± 00:14,1900-01-29,18:34,8.09,0.02078,8.08,0.02076,5.55,5.52,18.4,560,1200,a0004660,1900,1,29
3,(2015 RW83),1900-Feb-04 02:31 ± 22:21,1900-02-04,02:31,12.63,0.03245,12.46,0.03201,3.13,3.11,24.1,40,90,bK15R83W,1900,2,4
4,(2009 BW2),1900-Feb-04 11:05 ± 2_20:19,1900-02-04,11:05,6.86,0.01763,6.4,0.01644,4.27,4.23,25.1,25,57,bK09B02W,1900,2,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24980,(2019 WT1),2200-Nov-21 01:44 ± 4_04:29,2200-11-21,01:44,9.60,0.02466,6.81,0.01749,9.08,9.06,26.9,11,25,bK19W01T,2200,11,21
24981,413577 (2005 UL5),2200-Nov-23 02:16 ± 00:05,2200-11-23,02:16,5.84,0.01499,5.78,0.01486,18.16,18.15,20.3,230,520,a0413577,2200,11,23
24982,(2014 WT202),2200-Nov-23 03:36 ± 00:04,2200-11-23,03:36,16.33,0.04197,16.23,0.04170,12.00,11.99,21.1,160,360,bK14WK2T,2200,11,23
24983,163696 (2003 EB50),2200-Nov-26 06:30 ± < 00:01,2200-11-26,06:30,18.07,0.04643,18.07,0.04643,21.34,21.34,16.5,1300,3000,a0163696,2200,11,26


In [6]:
# Need to clean up some non numerical characters > and change column to a string
CleanTwoCenturies_df['CA Distance Minimum (LD) '] = CleanTwoCenturies_df['CA Distance Minimum (LD) '].str.replace('>', '')
CleanTwoCenturies_df['CA Distance Minimum (LD) '] = CleanTwoCenturies_df['CA Distance Minimum (LD) '].astype('float64')

In [7]:
CleanTwoCenturies_df.head()
CleanTwoCenturies_df.columns


Index(['Object', 'Close-Approach (CA) Date', 'Date', 'Time',
       'CA Distance Nominal (LD)', 'CA Distance Nominal (au)',
       'CA Distance Minimum (LD) ', 'CA Distance Minimum (au) ',
       'V relative (km/s)', 'V infinity (km/s)', 'H (mag)',
       'Estimated Diameter (min)', 'Estimated Diameter (max)', 'ObjectName',
       'Year', 'Month', 'Day'],
      dtype='object')

In [8]:
# CleanTwoCenturies_df['Estimated Diameter (min)'] = CleanTwoCenturies_df['Estimated Diameter (min)'].str.replace('m', '')
# CleanTwoCenturies_df['Estimated Diameter (max)'] = CleanTwoCenturies_df['Estimated Diameter (max)'].str.replace('m', '')

# # pd.to_numeric(CleanTwoCenturies_df['Estimated Diameter (max)'])

# CleanTwoCenturies_df

In [9]:
# Create a summary chart for all columns
# agg(regation)
Summary_df = CleanTwoCenturies_df.groupby('Year')\
\
.agg({
    'Object':'size',
    'CA Distance Nominal (LD)':['sum','mean','max','min','std', 'sem'],
    'CA Distance Nominal (au)':['sum','mean','max','min','std', 'sem'],
    'CA Distance Minimum (LD) ':['sum','mean','max','min','std', 'sem'],
    'CA Distance Minimum (au) ':['sum','mean','max','min','std', 'sem'],
    'V relative (km/s)':['sum','mean','max','min','std', 'sem'],
    'V infinity (km/s)':['sum','mean','max','min','std', 'sem'],
    'H (mag)':['sum','mean','max','min','std', 'sem'],
    'Estimated Diameter (min)':['sum','mean','max','min','std', 'sem'],
    'Estimated Diameter (max)':['sum','mean','max','min','std', 'sem'],
    'ObjectName':['size']    
})
Summary_df

Unnamed: 0_level_0,Object,CA Distance Nominal (LD),CA Distance Nominal (LD),CA Distance Nominal (LD),CA Distance Nominal (LD),CA Distance Nominal (LD),CA Distance Nominal (LD),CA Distance Nominal (au),CA Distance Nominal (au),CA Distance Nominal (au),...,Estimated Diameter (min),Estimated Diameter (min),Estimated Diameter (min),Estimated Diameter (max),Estimated Diameter (max),Estimated Diameter (max),Estimated Diameter (max),Estimated Diameter (max),Estimated Diameter (max),ObjectName
Unnamed: 0_level_1,size,sum,mean,max,min,std,sem,sum,mean,max,...,min,std,sem,sum,mean,max,min,std,sem,size
Year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1900,54,682.32,12.635556,19.41,3.75,4.416254,0.600976,1.75319,0.032466,0.04987,...,5,131.551274,17.901861,15435,285.833333,1200,12,291.314182,39.642839,54
1901,49,607.21,12.392041,19.44,4.09,4.509091,0.644156,1.56032,0.031843,0.04996,...,8,397.585149,56.797878,25587,522.183673,5200,19,897.659194,128.237028,49
1902,54,704.26,13.041852,19.42,1.84,4.600830,0.626094,1.80965,0.033512,0.04990,...,5,142.525392,19.395249,12681,234.833333,1600,12,313.933940,42.720998,54
1903,43,532.41,12.381628,18.96,2.96,4.668716,0.711972,1.36801,0.031814,0.04871,...,5,229.242880,34.959211,14038,326.465116,2600,12,504.596177,76.950196,43
1904,50,680.53,13.610600,19.37,2.30,4.604231,0.651137,1.74864,0.034973,0.04978,...,6,166.845944,23.595580,15168,303.360000,1400,14,374.363719,52.943025,50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2196,39,444.03,11.385385,18.92,2.04,4.816215,0.771212,1.14089,0.029254,0.04862,...,12,192.370439,30.803923,15407,395.051282,1700,27,424.966869,68.049160,39
2197,30,373.33,12.444333,19.18,1.94,5.653097,1.032110,0.95932,0.031977,0.04928,...,11,255.131465,46.580419,10269,342.300000,3000,25,582.678663,106.382082,30
2198,36,424.24,11.784444,18.77,1.97,4.765526,0.794254,1.09006,0.030279,0.04824,...,16,209.253109,34.875518,15226,422.944444,1900,36,469.642049,78.273675,36
2199,41,517.71,12.627073,18.98,1.11,4.975735,0.777079,1.33031,0.032447,0.04876,...,15,286.100025,44.681317,15165,369.878049,3900,33,620.518138,96.908652,41


In [10]:
# Reset index

Distance_df= CleanTwoCenturies_df.groupby('Year')['CA Distance Nominal (LD)'].agg(['count','mean','var', 'std', 'sum', 'sem']).reset_index()
MinDiameter_df= CleanTwoCenturies_df.groupby('Year')['Estimated Diameter (min)'].agg(['count','mean','var', 'std', 'sum', 'sem']).reset_index()
Vrelative_df= CleanTwoCenturies_df.groupby('Year')['V relative (km/s)'].agg(['count','mean','var', 'std', 'sum', 'sem']).reset_index()
Vinfinity_df= CleanTwoCenturies_df.groupby('Year')['V infinity (km/s)'].agg(['count','mean','var', 'std', 'sum', 'sem']).reset_index()

Distance_df

Unnamed: 0,Year,count,mean,var,std,sum,sem
0,1900,54,12.635556,19.503301,4.416254,682.32,0.600976
1,1901,49,12.392041,20.331900,4.509091,607.21,0.644156
2,1902,54,13.041852,21.167634,4.600830,704.26,0.626094
3,1903,43,12.381628,21.796904,4.668716,532.41,0.711972
4,1904,50,13.610600,21.198945,4.604231,680.53,0.651137
...,...,...,...,...,...,...,...
296,2196,39,11.385385,23.195926,4.816215,444.03,0.771212
297,2197,30,12.444333,31.957501,5.653097,373.33,1.032110
298,2198,36,11.784444,22.710237,4.765526,424.24,0.794254
299,2199,41,12.627073,24.757936,4.975735,517.71,0.777079


In [11]:
# Create different DFs for the the entire historical data with a full year, 1901-2019, 2000-2019, and from 2020 - 2200
Historical_distance_df = Distance_df.loc[(Distance_df["Year"] <2020)]
Historical_MinDiameter_df = MinDiameter_df.loc[(MinDiameter_df["Year"] <2020)]
Historical_Vrelative_df = Vrelative_df.loc[(Vrelative_df["Year"] <2020)]
Historical_Vinfinity_df = Vinfinity_df.loc[(Vinfinity_df["Year"] <2020)]

Historical_distance_df
Historical_Vinfinity_df

Unnamed: 0,Year,count,mean,var,std,sum,sem
0,1900,54,11.623148,31.600437,5.621427,627.65,0.764979
1,1901,49,13.086939,27.139051,5.209515,641.26,0.744216
2,1902,54,10.857407,32.405359,5.692571,586.30,0.774661
3,1903,43,10.283721,36.842800,6.069827,442.20,0.925640
4,1904,50,10.317200,40.402180,6.356271,515.86,0.898912
...,...,...,...,...,...,...,...
115,2015,569,10.599930,28.885605,5.374533,6031.36,0.225312
116,2016,749,10.674326,26.720396,5.169177,7995.07,0.188878
117,2017,876,10.438242,23.719190,4.870235,9143.90,0.164550
118,2018,891,10.415477,23.825721,4.881160,9280.19,0.163525


In [12]:
ObjectsBefore2000 = Distance_df.loc[(Distance_df["Year"] <2000)].mean()
MeanDistanceBefore2000 = Distance_df.loc[(Distance_df["Year"] <2000)].mean()
MeanDiameterBefore2000 = MinDiameter_df.loc[(MinDiameter_df["Year"] <2000)].mean()
MeanVrelBefore2000 = Vrelative_df.loc[(Vrelative_df["Year"] <2000)].mean()
MeanVinfBefore2000 = Vinfinity_df.loc[(Vinfinity_df["Year"] <2000)].mean()
    
MeanDistanceBefore2000 
MeanDiameterBefore2000 
# MeanVrelBefore2000 
# MeanVinfBefore2000 

Year      1949.500000
count       62.790000
mean       111.389665
var      43957.102460
std        191.716612
sum       6875.260000
sem         24.630170
dtype: float64

In [13]:
Distance_df.loc[(Distance_df["Year"].between(2000,2019))].mean()

MeanDistanceBetweem2000_2020 = Distance_df.loc[(Distance_df["Year"].between(2000,2019))].mean()
MeanDiameterBetweem2000_2020 = MinDiameter_df.loc[(MinDiameter_df["Year"].between(2000,2019))].mean()
MeanVrelBetweem2000_2020 = Vrelative_df.loc[(Vrelative_df["Year"].between(2000,2019))].mean()
MeanVinfBetweem2000_2020 = Vinfinity_df.loc[(Vinfinity_df["Year"].between(2000,2019))].mean()

MeanDistanceBetweem2000_2020 
MeanDiameterBetweem2000_2020 
MeanVrelBetweem2000_2020 
MeanVinfBetweem2000_2020 

Year     2009.500000
count     413.600000
mean       10.436646
var        25.991104
std         5.083648
sum      4340.015000
sem         0.294643
dtype: float64

In [14]:
# MeanDistanceBefore2000 = Distance_df.loc[(Distance_df["Year"] <2000)].mean()
# MeanDiameterBefore2000 = MinDiameter_df.loc[(MinDiameter_df["Year"] <2000)]
# MeanVrelBefore2000 = Vrelative_df.loc[(Vrelative_df["Year"] <2000)]
# MeanVinfBefore2000 = Vinfinity_df.loc[(Vinfinity_df["Year"] <2000)]

#  Historical_df[Historical_df['Estimated Diameter (min)'].between(150,1000)].groupby('Year')['Year'].size()

In [15]:
#Calculate the amount of objects/year and put tinto X, and Y to be graphed
Year_df = CleanTwoCenturies_df.groupby('Year').agg({'Object':'size'}).reset_index()
Year_df

x = Year_df['Year'].tolist()
y = Year_df['Object'].tolist()
# y

In [29]:
%matplotlib widget

# Generate a bar plot showing the total number of NEOs per year for 1901-2200

# Set x axis and tick locations
x_axis = np.arange(len(Year_df))
tick_locations = [value for value in x_axis]
tick_locations2 = np.arange(1900, 2210, step=10)
yrs = 1900 + x_axis
yrs
tick_locations2
# plt.scatter(Weights, yrs, marker="o")

# Create a bar graph that shows the total NEOs count/year from 1901-2200
plt.figure(figsize=(14,5))
plt.bar(x, y, color='r', alpha=0.5, align="center")
# plt.scatter(x, y, color='r', alpha=0.5)
plt.plot(x, y, color='r', alpha=0.5)

# Setup the tick locations
plt.xticks(tick_locations2, tick_locations2, color='midnightblue', rotation="vertical")
plt.yticks(color='midnightblue')
# plt.plot(x, yrs, color="blue")

# Set x and y limits
plt.xlim(1900, 2200)
plt.ylim(0, 1300)

# Set a Title and labels
plt.title("Asteroid Count vs Year", color='midnightblue')
plt.xlabel("Year", color='midnightblue')
plt.ylabel("Asteroid Count", color='midnightblue')
plt.tight_layout()
plt.savefig('1900-_2200.png', bbox_inches='tight')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [17]:
# Create DFs of the different periods (histotical, 2000-2019, and the Future 2019-2200)
Historical_df = Year_df.loc[(Year_df["Year"] <2019)].reset_index()
Future_df = Year_df.loc[(Year_df["Year"] >2020)].reset_index()
Before2000_df = Year_df.loc[(Year_df["Year"] <2000)].reset_index()
Spike20Years_df = Historical_df.loc[(Historical_df["Year"] >2000)].reset_index()
Historical_df
Spike20Years_df
# Before2000_df
# Future_df

Unnamed: 0,level_0,index,Year,Object
0,101,101,2001,114
1,102,102,2002,143
2,103,103,2003,146
3,104,104,2004,209
4,105,105,2005,221
5,106,106,2006,234
6,107,107,2007,250
7,108,108,2008,317
8,109,109,2009,339
9,110,110,2010,342


In [18]:
# Create a summary chart for all columns
# agg(regation)
Summary2_df = CleanTwoCenturies_df.groupby('Year')\
\
.agg({
    'Object':'size',
    'CA Distance Nominal (LD)':['sum','mean','max','min','std', 'sem'],
    'CA Distance Nominal (au)':['sum','mean','max','min','std', 'sem'],
    'CA Distance Minimum (LD) ':['sum','mean','max','min','std', 'sem'],
    'CA Distance Minimum (au) ':['sum','mean','max','min','std', 'sem'],
    'V relative (km/s)':['sum','mean','max','min','std', 'sem'],
    'V infinity (km/s)':['sum','mean','max','min','std', 'sem'],
    'H (mag)':['sum','mean','max','min','std', 'sem'],
    'Estimated Diameter (min)':['sum','mean','max','min','std', 'sem'],
    'Estimated Diameter (max)':['sum','mean','max','min','std', 'sem'],
    'ObjectName':['size']    
})
Summary_df

Unnamed: 0_level_0,Object,CA Distance Nominal (LD),CA Distance Nominal (LD),CA Distance Nominal (LD),CA Distance Nominal (LD),CA Distance Nominal (LD),CA Distance Nominal (LD),CA Distance Nominal (au),CA Distance Nominal (au),CA Distance Nominal (au),...,Estimated Diameter (min),Estimated Diameter (min),Estimated Diameter (min),Estimated Diameter (max),Estimated Diameter (max),Estimated Diameter (max),Estimated Diameter (max),Estimated Diameter (max),Estimated Diameter (max),ObjectName
Unnamed: 0_level_1,size,sum,mean,max,min,std,sem,sum,mean,max,...,min,std,sem,sum,mean,max,min,std,sem,size
Year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1900,54,682.32,12.635556,19.41,3.75,4.416254,0.600976,1.75319,0.032466,0.04987,...,5,131.551274,17.901861,15435,285.833333,1200,12,291.314182,39.642839,54
1901,49,607.21,12.392041,19.44,4.09,4.509091,0.644156,1.56032,0.031843,0.04996,...,8,397.585149,56.797878,25587,522.183673,5200,19,897.659194,128.237028,49
1902,54,704.26,13.041852,19.42,1.84,4.600830,0.626094,1.80965,0.033512,0.04990,...,5,142.525392,19.395249,12681,234.833333,1600,12,313.933940,42.720998,54
1903,43,532.41,12.381628,18.96,2.96,4.668716,0.711972,1.36801,0.031814,0.04871,...,5,229.242880,34.959211,14038,326.465116,2600,12,504.596177,76.950196,43
1904,50,680.53,13.610600,19.37,2.30,4.604231,0.651137,1.74864,0.034973,0.04978,...,6,166.845944,23.595580,15168,303.360000,1400,14,374.363719,52.943025,50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2196,39,444.03,11.385385,18.92,2.04,4.816215,0.771212,1.14089,0.029254,0.04862,...,12,192.370439,30.803923,15407,395.051282,1700,27,424.966869,68.049160,39
2197,30,373.33,12.444333,19.18,1.94,5.653097,1.032110,0.95932,0.031977,0.04928,...,11,255.131465,46.580419,10269,342.300000,3000,25,582.678663,106.382082,30
2198,36,424.24,11.784444,18.77,1.97,4.765526,0.794254,1.09006,0.030279,0.04824,...,16,209.253109,34.875518,15226,422.944444,1900,36,469.642049,78.273675,36
2199,41,517.71,12.627073,18.98,1.11,4.975735,0.777079,1.33031,0.032447,0.04876,...,15,286.100025,44.681317,15165,369.878049,3900,33,620.518138,96.908652,41


In [19]:
# Summary_df
# Historical_df = Summary_df.loc[(Year_df["Year"] <2020)].reset_index()

In [20]:
Historical_df.describe()


Unnamed: 0,index,Year,Object
count,119.0,119.0,119.0
mean,59.0,1959.0,112.722689
std,34.496377,34.496377,149.637637
min,0.0,1900.0,34.0
25%,29.5,1929.5,57.0
50%,59.0,1959.0,67.0
75%,88.5,1988.5,76.0
max,118.0,2018.0,891.0


In [21]:
Spike20Years_df.describe()

Unnamed: 0,level_0,index,Year,Object
count,18.0,18.0,18.0,18.0
mean,109.5,109.5,2009.5,390.777778
std,5.338539,5.338539,5.338539,241.143978
min,101.0,101.0,2001.0,114.0
25%,105.25,105.25,2005.25,224.25
50%,109.5,109.5,2009.5,328.0
75%,113.75,113.75,2013.75,504.5
max,118.0,118.0,2018.0,891.0


In [22]:
Future_df.describe()

Unnamed: 0,index,Year,Object
count,180.0,180.0,180.0
mean,210.5,2110.5,53.783333
std,52.105662,52.105662,16.482689
min,121.0,2021.0,22.0
25%,165.75,2065.75,40.75
50%,210.5,2110.5,53.0
75%,255.25,2155.25,67.0
max,300.0,2200.0,98.0


In [23]:
Historical_distance_df = Distance_df.loc[(Year_df["Year"] <2020)]
Historical_MinDiameter_df = MinDiameter_df.loc[(Year_df["Year"] <2020)]
Historical_Vrelative_df = Vrelative_df.loc[(Year_df["Year"] <2020)]
Historical_Vinfinity_df = Vinfinity_df.loc[(Year_df["Year"] <2020)]

Hist_Objects = CleanTwoCenturies_df.groupby("Object").size()
len(Hist_Objects)

Historical_distance_df

Unnamed: 0,Year,count,mean,var,std,sum,sem
0,1900,54,12.635556,19.503301,4.416254,682.32,0.600976
1,1901,49,12.392041,20.331900,4.509091,607.21,0.644156
2,1902,54,13.041852,21.167634,4.600830,704.26,0.626094
3,1903,43,12.381628,21.796904,4.668716,532.41,0.711972
4,1904,50,13.610600,21.198945,4.604231,680.53,0.651137
...,...,...,...,...,...,...,...
115,2015,569,9.066344,29.865328,5.464918,5158.75,0.229101
116,2016,749,8.595327,31.758841,5.635498,6437.90,0.205917
117,2017,876,8.481290,30.992925,5.567129,7429.61,0.188096
118,2018,891,8.319360,30.367530,5.510674,7412.55,0.184615


In [24]:
Historical_distance_df['mean'].mean()

11.718887504917932

In [25]:
# Pull out the objects that are smalelr annd larger than 150m diameter

Historical_df = CleanTwoCenturies_df.loc[(CleanTwoCenturies_df["Year"] <2020)]

SizeSmall_df = Historical_df[Historical_df['Estimated Diameter (min)'] <= 150].groupby('Year')['Year'].size()
SizeLarge_df = Historical_df[Historical_df['Estimated Diameter (min)'] >= 150].groupby('Year')['Year'].size()

SizeSmall_df
SizeLarge_df


Year
1900    20
1901    21
1902    15
1903    14
1904    15
        ..
2015    16
2016    17
2017    26
2018    20
2019    22
Name: Year, Length: 120, dtype: int64

In [26]:
# create X and Ys for all the data to be plot in the sublplot
y_H_dist = Historical_distance_df['mean']
y_H_diam = Historical_MinDiameter_df['mean']
y_H_Vrel = Historical_Vrelative_df['mean']
y_H_Vint = Historical_Vinfinity_df['mean']

x_Historical = Historical_distance_df['count']
# x_Historical
# y_H_diam
# y_H_Vrel
# y_H_Vint

In [28]:
%matplotlib widget


# Generate 4-row mulitple chart showing the historical count, avg diameter, Dstance, and NEO size

# Set x axis and tick locations
x_axis = np.arange(len(x_Historical))
tick_locations = [value for value in x_axis]
tick_locations2 = np.arange(1900, 2020, step=10)
yrs = 1900 + x_axis
yrs
tick_locations2



# Create a 4 row subplots that share an X axis 
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, sharex='all', figsize=(14,14))

# # Create a list indicating where to write x labels and set figure size to adjust for space

# plot out the 
ax1.plot(yrs, x_Historical, color='r', alpha=0.5)
ax2.plot(yrs, y_H_diam, color='b', alpha=0.5)
ax3.plot(yrs, y_H_dist, color='black', alpha=0.5)


l1=ax4.plot(yrs, SizeSmall_df, color='black', alpha=0.5)
l2=ax4.plot(yrs, SizeLarge_df, color='red', alpha=0.5)



plt.xticks(tick_locations2, tick_locations2, color='white', rotation="vertical")

ax1.tick_params(labelcolor='midnightblue')
ax2.tick_params(labelcolor='midnightblue')
ax3.tick_params(labelcolor='midnightblue')
ax4.tick_params(labelcolor='midnightblue')


ax1.set_axis_on

# Set a Title and labels

plt.xlabel("Year", color='midnightblue')


ax1.set_ylabel("Asteroid Count", color='midnightblue')
ax2.set_ylabel("Average Minimum Diameter(m)", color='midnightblue')
ax3.set_ylabel("Average CA Distance Nominal (LD)", color='midnightblue')
ax4.set_ylabel("Asteroid Count", color='midnightblue')

ax1.set_title("Asteroid Count", color='midnightblue')
ax2.set_title("Average Minimum Diameter(m)", color='midnightblue')
ax3.set_title("Average CA Distance Nominal (LD)", color='midnightblue')
ax4.set_title("Small(<150m) and Larger (>150m) Asteroid Count", color='midnightblue')

# creae a legend on the
ax4.legend(["Small Asteroid (<150m)","Large Asteroid(>150m)"],loc="upper left")



plt.tight_layout()
# save graphic file
fig.savefig('AsteroidComparison.png', bbox_inches='tight')


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

We notice that between 2000 and 2020 there has been a large, almost exponential, increase in the amount of objects observed since 2000.  From 101 in 2000 to 1142 in 2019.  During this 19yr period, Average Distance Nominal was ~28% less in 2019, and average diameter size decreased from just over 76% smaller. So it would seem that it isn't that there were more NEOs in the last 19 years, but that more smaller NEOs were observed and the objects came closer. observed.