In [142]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn import metrics


data = pd.read_csv('merged_data.csv')


data = data.dropna()


data['x'] = np.cos(np.radians(data['latitude'])) * np.cos(np.radians(data['longitude']))
data['y'] = np.cos(np.radians(data['latitude'])) * np.sin(np.radians(data['longitude']))
data['z'] = np.sin(np.radians(data['latitude']))

In [143]:
# get dummies
unique_countries = data['country'].unique()
years = [str(year) for year in range(1950, 2024)]

# create a dataframe to store the cumulative number of meteors for each country and each year
cumulative_meteors = pd.DataFrame(0, index=unique_countries, columns=years)
cumulative_meteors.head()

Unnamed: 0,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
United States,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Argentina,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Mexico,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Russia,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Portugal,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
# cumulatively count meteors for each country and each year
for index, row in data.iterrows():
    if str(int(row['year'])) in years:  
        #print(1)
        cumulative_meteors.loc[row['country'], str(int(row['year']))]+=1
cumulative_meteors

Unnamed: 0,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
United States,19,8,5,5,11,10,8,7,5,5,...,0,0,0,0,0,0,0,0,0,0
Argentina,1,1,0,1,1,0,1,2,0,0,...,0,0,0,0,0,0,0,0,0,0
Mexico,2,0,0,0,0,0,0,0,2,0,...,0,0,0,0,0,0,0,0,0,0
Russia,1,1,1,0,1,0,1,1,0,1,...,0,0,0,0,0,0,0,0,0,0
Portugal,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
United Arab Emirates,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Colombia,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Romania,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Ecuador,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [145]:
cumulative_meteors = cumulative_meteors.cumsum(axis=1).reset_index()
cumulative_meteors.rename(columns={'index': 'country'}, inplace=True)
cumulative_meteors

Unnamed: 0,country,1950,1951,1952,1953,1954,1955,1956,1957,1958,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,United States,19,27,32,37,48,58,66,73,78,...,1019,1019,1019,1019,1019,1019,1019,1019,1019,1019
1,Argentina,1,2,2,3,4,4,5,7,7,...,29,29,29,29,29,29,29,29,29,29
2,Mexico,2,2,2,2,2,2,2,2,4,...,49,49,49,49,49,49,49,49,49,49
3,Russia,1,2,3,3,4,4,5,6,6,...,63,63,63,63,63,63,63,63,63,63
4,Portugal,1,1,1,1,1,1,1,1,1,...,3,3,3,3,3,3,3,3,3,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,United Arab Emirates,0,0,0,0,0,0,0,0,0,...,28,28,28,28,28,28,28,28,28,28
92,Colombia,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,1
93,Romania,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,1
94,Ecuador,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,1


In [164]:
#show the data of China
cumulative_meteors.loc[cumulative_meteors['country'] == 'China']

Unnamed: 0,country,1950,1951,1952,1953,1954,1955,1956,1957,1958,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
24,China,0,0,1,1,2,2,4,5,6,...,72,72,72,72,72,72,72,72,72,72


In [146]:
population_density = data.groupby('country')[years].mean().reset_index()


population_density

Unnamed: 0,country,1950,1951,1952,1953,1954,1955,1956,1957,1958,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,Afghanistan,11.976319,12.135203,12.302222,12.469964,12.641306,12.824628,13.020720,13.227149,13.435169,...,50.504736,52.105301,53.443440,54.965585,56.589043,58.303333,60.171292,61.605819,62.496676,63.845705
1,Algeria,3.786487,3.892056,3.997585,4.103154,4.204649,4.302495,4.401061,4.499862,4.599007,...,16.460668,16.802644,17.151629,17.503715,17.846211,18.177696,18.491561,18.793445,19.094187,19.382560
2,Angola,3.651134,3.691796,3.731200,3.775053,3.823468,3.876445,3.933802,3.995061,4.059625,...,21.786131,22.585865,23.408254,24.251896,25.103999,25.969064,26.831742,27.699069,28.583484,29.477746
3,Argentina,6.095700,6.216923,6.339571,6.462224,6.583977,6.703905,6.823518,6.942820,7.061460,...,15.410816,15.573055,15.724678,15.863864,15.994957,16.109071,16.187335,16.230432,16.264683,16.311426
4,Australia,1.064362,1.095845,1.123453,1.147584,1.171444,1.198232,1.226802,1.254673,1.281860,...,3.071424,3.117296,3.167015,3.219223,3.269816,3.317068,3.351053,3.378730,3.410565,3.443126
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91,Uruguay,12.901057,13.028118,13.170677,13.326229,13.492627,13.666791,13.847710,14.034796,14.225903,...,19.378822,19.446275,19.511322,19.564179,19.599379,19.614803,19.624980,19.611853,19.578472,19.562117
92,Uzbekistan,14.668323,15.009419,15.365595,15.745455,16.149055,16.582122,17.047576,17.543632,18.075134,...,71.046059,72.283372,73.541533,74.798255,76.101294,77.491070,78.952449,80.497639,82.132006,83.808902
93,Venezuela,5.989285,6.255261,6.526720,6.803210,7.084781,7.371760,7.664746,7.964960,8.273251,...,33.012433,33.361491,33.570722,33.352054,32.525238,31.576470,31.037408,30.812352,30.785281,30.881127
94,Yemen,8.744398,8.871996,9.012201,9.162060,9.320860,9.488554,9.665510,9.852085,10.049309,...,57.250050,59.017329,60.815974,62.675761,64.558937,66.502656,68.441130,70.345343,72.395923,74.608025


In [165]:
merged_data = pd.merge(cumulative_meteors, population_density, on='country', suffixes=('_meteors', '_popdensity'))


numeric_columns = [col for col in merged_data.columns if col != 'country']
merged_data_china = merged_data.loc[merged_data['country'] == 'China']

correlation_results = merged_data_china[numeric_columns].corr().loc[
    [col for col in numeric_columns if 'meteors' in col], 
    [col for col in numeric_columns if 'popdensity' in col]
]

In [171]:
merged_data_china

Unnamed: 0,country,1950_meteors,1951_meteors,1952_meteors,1953_meteors,1954_meteors,1955_meteors,1956_meteors,1957_meteors,1958_meteors,...,2014_popdensity,2015_popdensity,2016_popdensity,2017_popdensity,2018_popdensity,2019_popdensity,2020_popdensity,2021_popdensity,2022_popdensity,2023_popdensity
24,China,0,0,1,1,2,2,4,5,6,...,144.57833,145.430643,146.255482,147.120284,147.813433,148.283371,148.552718,148.587215,148.456205,148.18593


In [172]:
correlation_results

Unnamed: 0,1950_popdensity,1951_popdensity,1952_popdensity,1953_popdensity,1954_popdensity,1955_popdensity,1956_popdensity,1957_popdensity,1958_popdensity,1959_popdensity,...,2014_popdensity,2015_popdensity,2016_popdensity,2017_popdensity,2018_popdensity,2019_popdensity,2020_popdensity,2021_popdensity,2022_popdensity,2023_popdensity
1950_meteors,,,,,,,,,,,...,,,,,,,,,,
1951_meteors,,,,,,,,,,,...,,,,,,,,,,
1952_meteors,,,,,,,,,,,...,,,,,,,,,,
1953_meteors,,,,,,,,,,,...,,,,,,,,,,
1954_meteors,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019_meteors,,,,,,,,,,,...,,,,,,,,,,
2020_meteors,,,,,,,,,,,...,,,,,,,,,,
2021_meteors,,,,,,,,,,,...,,,,,,,,,,
2022_meteors,,,,,,,,,,,...,,,,,,,,,,
