In [1]:
import pandas as pd
import plotly as py
import plotly.express as px
import numpy as np
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [2]:
df = pd.read_csv('data/Meteorite_Landings.csv')
df['count'] = pd.Series(np.ones(df.shape[0])).astype(int)
# Turning floats into integers
df['year'] = df['year'].fillna(0).apply(lambda x: int(x))

df['mass (g)'] = df['mass (g)'].apply(lambda x: round(x/1000, 3))
df['mass (g)'] = df['mass (g)'].replace(0, np.nan)
df.rename(columns={'mass (g)': 'mass (kg)', 'reclong': 'lon', 'reclat': 'lat'}, inplace=True)

df.drop(columns='GeoLocation', inplace=True)

In [3]:
df.head()

Unnamed: 0,name,id,nametype,recclass,mass (kg),fall,year,lat,lon,count
0,Aachen,1,Valid,L5,0.021,Fell,1880,50.775,6.08333,1
1,Aarhus,2,Valid,H6,0.72,Fell,1951,56.18333,10.23333,1
2,Abee,6,Valid,EH4,107.0,Fell,1952,54.21667,-113.0,1
3,Acapulco,10,Valid,Acapulcoite,1.914,Fell,1976,16.88333,-99.9,1
4,Achiras,370,Valid,L6,0.78,Fell,1902,-33.16667,-64.95,1


In [4]:
recclass_l = df[['recclass']].value_counts()[:8]
recclass_l

recclass
L6          8285
H5          7142
L5          4796
H6          4528
H4          4211
LL5         2766
LL6         2043
L4          1253
dtype: int64

In [5]:
# The most commont classes
dfclass = df[df['recclass'].isin(['L6', 'H5', 'L5', 'H6', 'H4', 'LL5', 'LL6', 'L4'])]
df_c = dfclass.groupby('recclass').sum()[['mass (kg)', 'count']].sort_values(by='count', ascending=False).reset_index()
df_c['c_mass (kg)'] = round(df_c['mass (kg)']/df_c['count'], 3)
df_c.drop(columns='mass (kg)', inplace=True)

In [6]:
df_c = df_c.groupby('recclass').sum()[['c_mass (kg)', 'count']].sort_values(by='count', ascending=False).reset_index()
df_c['mode'] = dfclass['mass (kg)'].mode()

In [7]:
df_c.head()

Unnamed: 0,recclass,c_mass (kg),count,mode
0,L6,1.449,8285,0.001
1,H5,2.165,7142,
2,L5,1.797,4796,
3,H6,0.862,4528,
4,H4,0.997,4211,


In [8]:
fig = px.bar(
    df_c,
    x='recclass',
    y='count',
    color='c_mass (kg)',
    title='Amount for the most common meteorite properties classified by mass'
)
fig.show()

In [9]:
df['lat'].replace(0, np.nan, inplace=True)
df['lon'].replace(0, np.nan, inplace=True)
df.sort_values(by='year', ascending=False)

Unnamed: 0,name,id,nametype,recclass,mass (kg),fall,year,lat,lon,count
30682,Northwest Africa 7701,57150,Valid,CK6,0.055,Found,2101,,,1
30776,Northwest Africa 7857,57422,Valid,LL6,0.246,Found,2013,,,1
30781,Northwest Africa 7863,57427,Valid,LL5,1.000,Found,2013,,,1
194,Chelyabinsk,57165,Valid,LL5,100.000,Fell,2013,54.81667,61.11667,1
30780,Northwest Africa 7862,57426,Valid,L4/5,0.317,Found,2013,,,1
...,...,...,...,...,...,...,...,...,...,...
17253,Jiddat al Harasis 792,56524,Valid,H6,0.353,Found,0,19.81889,55.93250,1
17254,Jiddat al Harasis 793,56525,Valid,L5,0.420,Found,0,19.92944,55.95778,1
28381,Northwest Africa 4137,34409,Valid,L6,0.121,Found,0,,,1
17255,Jiddat al Harasis 794,56526,Valid,L5,0.387,Found,0,19.96944,55.98778,1


In [10]:
df[df['recclass']=='H5']

Unnamed: 0,name,id,nametype,recclass,mass (kg),fall,year,lat,lon,count
7,Agen,392,Valid,H5,30.000,Fell,1814,44.21667,0.61667,1
25,Alessandria,463,Valid,H5,0.908,Fell,1860,44.88333,8.75000,1
28,Allegan,2276,Valid,H5,32.000,Fell,1899,42.53333,-85.88333,1
32,Ambapur Nagla,2290,Valid,H5,6.400,Fell,1895,27.66667,78.25000,1
41,Anlong,2305,Valid,H5,2.500,Fell,1971,25.15000,105.18333,1
...,...,...,...,...,...,...,...,...,...,...
45665,Yarle Lakes 001,30353,Valid,H5,0.913,Found,1990,-30.31667,131.46667,1
45682,Yorktown (Texas),30371,Valid,H5,3.500,Found,1957,28.95000,-97.40278,1
45688,Yucca 016,57158,Valid,H5,0.026,Found,2011,34.82658,-114.27763,1
45689,Yucca 017,57159,Valid,H5,0.200,Found,2011,34.81923,-114.27735,1


In [11]:
# df_2 = df[df['recclass'].isin(['L6', 'H5', 'L5', 'H6', 'H4', 'LL5', 'LL6', 'L4'])]
df_2 = df.dropna().groupby('recclass')[['mass (kg)']].describe()
df_2 = df_2['mass (kg)'].sort_values(by='count', ascending=False)
df_2[:20]

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
recclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
L6,6454.0,1.730408,15.786444,0.001,0.008,0.037,0.214,564.0
H5,5514.0,2.621751,58.066252,0.001,0.007,0.026,0.171,4000.0
H4,3298.0,1.107494,10.918188,0.001,0.004,0.019,0.11475,500.0
H6,3012.0,1.19322,10.381568,0.001,0.006,0.023,0.14325,295.0
L5,2699.0,2.860147,44.056703,0.001,0.007,0.037,0.1975,1750.0
LL5,1874.0,0.510585,10.476975,0.001,0.004,0.012,0.036,408.0
LL6,956.0,1.250256,10.878717,0.001,0.007,0.031,0.172,271.0
L4,825.0,2.122017,15.174913,0.001,0.01,0.061,0.303,257.0
H4/5,379.0,1.76338,14.555689,0.001,0.0035,0.008,0.0855,256.0
CM2,275.0,0.543884,6.102546,0.001,0.004,0.011,0.0375,100.0


In [12]:
fig = make_subplots(rows=1, cols=2)

fig.add_trace(
    go.Scatter(x=df_2.index, y=df_2['mean']),
    row=1, col=1
)

fig.add_trace(
    go.Line(x=df_2.index, y=df_2['mean'])
)

fig.update_layout(height=800, width=1500, title_text="Side By Side Subplots")
fig.show()


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.




In [13]:
df[0:10]

Unnamed: 0,name,id,nametype,recclass,mass (kg),fall,year,lat,lon,count
0,Aachen,1,Valid,L5,0.021,Fell,1880,50.775,6.08333,1
1,Aarhus,2,Valid,H6,0.72,Fell,1951,56.18333,10.23333,1
2,Abee,6,Valid,EH4,107.0,Fell,1952,54.21667,-113.0,1
3,Acapulco,10,Valid,Acapulcoite,1.914,Fell,1976,16.88333,-99.9,1
4,Achiras,370,Valid,L6,0.78,Fell,1902,-33.16667,-64.95,1
5,Adhi Kot,379,Valid,EH4,4.239,Fell,1919,32.1,71.8,1
6,Adzhi-Bogdo (stone),390,Valid,LL3-6,0.91,Fell,1949,44.83333,95.16667,1
7,Agen,392,Valid,H5,30.0,Fell,1814,44.21667,0.61667,1
8,Aguada,398,Valid,L6,1.62,Fell,1930,-31.6,-65.23333,1
9,Aguila Blanca,417,Valid,L,1.44,Fell,1920,-30.86667,-64.55,1


In [14]:
list(np.arange(0, 110, 10))

[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

In [15]:
df.shape

(45716, 10)

In [16]:
df_1 = df.dropna().groupby('recclass')[['mass (kg)']].describe()
df_1 = df_1['mass (kg)'].sort_values(by='count', ascending=False)
df_1.shape

(387, 8)

In [17]:
df_1

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
recclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
L6,6454.0,1.730408,15.786444,0.001,0.008,0.037,0.21400,564.000
H5,5514.0,2.621751,58.066252,0.001,0.007,0.026,0.17100,4000.000
H4,3298.0,1.107494,10.918188,0.001,0.004,0.019,0.11475,500.000
H6,3012.0,1.193220,10.381568,0.001,0.006,0.023,0.14325,295.000
L5,2699.0,2.860147,44.056703,0.001,0.007,0.037,0.19750,1750.000
...,...,...,...,...,...,...,...,...
L/LL5-6,1.0,3.287000,,3.287,3.287,3.287,3.28700,3.287
L/LL5/6,1.0,0.339000,,0.339,0.339,0.339,0.33900,0.339
L4-an,1.0,0.057000,,0.057,0.057,0.057,0.05700,0.057
L/LL6-an,1.0,5.900000,,5.900,5.900,5.900,5.90000,5.900


In [18]:
data = []
data.append(go.Bar(name='Mean', x=df_1.index, y=df_1['mean']))

data.append(go.Bar(name='Std', x=df_1.index, y=df_1['std']))

data.append(go.Bar(name='Max', x=df_1.index, y=df_1['max']))

In [19]:
fig = go.Figure(data=data)
fig.update_layout(barmode='group')
fig.update_xaxes(title_text="Class")
fig.update_yaxes(title_text="A")
fig.show()

In [20]:
df.sort_values(by='year')
df.head()

Unnamed: 0,name,id,nametype,recclass,mass (kg),fall,year,lat,lon,count
0,Aachen,1,Valid,L5,0.021,Fell,1880,50.775,6.08333,1
1,Aarhus,2,Valid,H6,0.72,Fell,1951,56.18333,10.23333,1
2,Abee,6,Valid,EH4,107.0,Fell,1952,54.21667,-113.0,1
3,Acapulco,10,Valid,Acapulcoite,1.914,Fell,1976,16.88333,-99.9,1
4,Achiras,370,Valid,L6,0.78,Fell,1902,-33.16667,-64.95,1


In [31]:
df = df[(df['year']!=0) & (df['year']<2100)]