In [380]:
import pandas as pd
import plotly as py
import plotly.express as px
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [381]:
df = pd.read_csv('data/Meteorite_Landings.csv')
df['count'] = pd.Series(np.ones(df.shape[0])).astype(int)
# Turning floats into integers
df['year'] = df['year'].fillna(0).apply(lambda x: int(x))

df['mass (g)'] = df['mass (g)'].apply(lambda x: round(x/1000, 3))
df['mass (g)'] = df['mass (g)'].replace(0, np.nan)
df.rename(columns={'mass (g)': 'mass (kg)', 'reclong': 'lon', 'reclat': 'lat'}, inplace=True)

df.drop(columns='GeoLocation', inplace=True)

In [382]:
df.head()

Unnamed: 0,name,id,nametype,recclass,mass (kg),fall,year,lat,lon,count
0,Aachen,1,Valid,L5,0.021,Fell,1880,50.775,6.08333,1
1,Aarhus,2,Valid,H6,0.72,Fell,1951,56.18333,10.23333,1
2,Abee,6,Valid,EH4,107.0,Fell,1952,54.21667,-113.0,1
3,Acapulco,10,Valid,Acapulcoite,1.914,Fell,1976,16.88333,-99.9,1
4,Achiras,370,Valid,L6,0.78,Fell,1902,-33.16667,-64.95,1


In [383]:
recclass_l = df[['recclass']].value_counts()[:8]
recclass_l

recclass
L6          8285
H5          7142
L5          4796
H6          4528
H4          4211
LL5         2766
LL6         2043
L4          1253
dtype: int64

In [384]:
# The most commont classes
dfclass = df[df['recclass'].isin(['L6', 'H5', 'L5', 'H6', 'H4', 'LL5', 'LL6', 'L4'])]
df_c = dfclass.groupby('recclass').sum()[['mass (kg)', 'count']].sort_values(by='count', ascending=False).reset_index()
df_c['c_mass (kg)'] = round(df_c['mass (kg)']/df_c['count'], 3)
df_c.drop(columns='mass (kg)', inplace=True)

In [385]:
df_c = df_c.groupby('recclass').sum()[['c_mass (kg)', 'count']].sort_values(by='count', ascending=False).reset_index()
df_c['mode'] = dfclass['mass (kg)'].mode()

In [386]:
df_c.head()

Unnamed: 0,recclass,c_mass (kg),count,mode
0,L6,1.449,8285,0.001
1,H5,2.165,7142,
2,L5,1.797,4796,
3,H6,0.862,4528,
4,H4,0.997,4211,


In [387]:
fig = px.bar(
    df_c,
    x='recclass',
    y='count',
    color='c_mass (kg)',
    title='Amount for the most common meteorite properties classified by mass'
)
fig.show()

In [388]:
df['lat'].replace(0, np.nan, inplace=True)
df['lon'].replace(0, np.nan, inplace=True)
df.sort_values(by='year', ascending=False)

Unnamed: 0,name,id,nametype,recclass,mass (kg),fall,year,lat,lon,count
30682,Northwest Africa 7701,57150,Valid,CK6,0.055,Found,2101,,,1
30776,Northwest Africa 7857,57422,Valid,LL6,0.246,Found,2013,,,1
30781,Northwest Africa 7863,57427,Valid,LL5,1.000,Found,2013,,,1
194,Chelyabinsk,57165,Valid,LL5,100.000,Fell,2013,54.81667,61.11667,1
30780,Northwest Africa 7862,57426,Valid,L4/5,0.317,Found,2013,,,1
...,...,...,...,...,...,...,...,...,...,...
17253,Jiddat al Harasis 792,56524,Valid,H6,0.353,Found,0,19.81889,55.93250,1
17254,Jiddat al Harasis 793,56525,Valid,L5,0.420,Found,0,19.92944,55.95778,1
28381,Northwest Africa 4137,34409,Valid,L6,0.121,Found,0,,,1
17255,Jiddat al Harasis 794,56526,Valid,L5,0.387,Found,0,19.96944,55.98778,1


In [400]:
df[df['recclass']=='H5']

Unnamed: 0,name,id,nametype,recclass,mass (kg),fall,year,lat,lon,count
7,Agen,392,Valid,H5,30.000,Fell,1814,44.21667,0.61667,1
25,Alessandria,463,Valid,H5,0.908,Fell,1860,44.88333,8.75000,1
28,Allegan,2276,Valid,H5,32.000,Fell,1899,42.53333,-85.88333,1
32,Ambapur Nagla,2290,Valid,H5,6.400,Fell,1895,27.66667,78.25000,1
41,Anlong,2305,Valid,H5,2.500,Fell,1971,25.15000,105.18333,1
...,...,...,...,...,...,...,...,...,...,...
45665,Yarle Lakes 001,30353,Valid,H5,0.913,Found,1990,-30.31667,131.46667,1
45682,Yorktown (Texas),30371,Valid,H5,3.500,Found,1957,28.95000,-97.40278,1
45688,Yucca 016,57158,Valid,H5,0.026,Found,2011,34.82658,-114.27763,1
45689,Yucca 017,57159,Valid,H5,0.200,Found,2011,34.81923,-114.27735,1


In [408]:
# df_2 = df[df['recclass'].isin(['L6', 'H5', 'L5', 'H6', 'H4', 'LL5', 'LL6', 'L4'])]
df_2 = df.dropna().groupby('recclass')[['mass (kg)']].describe()
df_2 = df_2['mass (kg)'].sort_values(by='mean', ascending=False)
df_2[:20]

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
recclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"Iron, IVB",14.0,4322.832857,16025.035776,1.8,13.2,33.11,69.0,60000.0
"Iron, IIIE",12.0,2409.610417,8059.559334,2.5,9.98125,47.25,180.5,28000.0
"Iron, IAB-MG",74.0,1458.890338,6850.454356,0.117,3.813,13.8,72.0,50000.0
"Iron, IC",9.0,991.122222,1691.753504,3.6,54.0,683.0,825.0,5360.0
"Iron, IAB-ung",42.0,751.0245,3829.723709,0.011,0.32275,3.74,19.5,24000.0
Mesosiderite-A1,6.0,698.206333,1536.643943,0.188,11.7375,42.925,216.125,3828.0
"Pallasite, PMG-an",11.0,685.358909,1383.10881,0.074,0.845,17.0,424.0,4300.0
"Iron, IIIAB",272.0,509.123603,3972.394335,0.003,3.66875,17.295,79.0,58200.0
"Iron, IVA",61.0,508.347475,3340.591345,0.03,4.3,12.28,30.0,26000.0
"Iron, ungrouped",97.0,490.581629,2750.566215,0.001,0.86,8.74,39.0,22000.0


In [413]:
fig = make_subplots(rows=1, cols=2)

fig.add_trace(
    go.Scatter(x=df_2.index, y=df_2['mean']),
    row=1, col=1
)

fig.add_trace(
    go.Line(x=df_2.index, y=df_2['mean'])
)

fig.update_layout(height=800, width=1500, title_text="Side By Side Subplots")
fig.show()