In [1]:
import pandas as pd

In [2]:
rock_samples=pd.read_csv(r'C:\Users\Devesh\Desktop\learnwithdrg-main\learnwithdrg-main\OverTheMoon\sample-return\data\rocksamples.csv')

In [3]:
rock_samples.head()

Unnamed: 0,ID,Mission,Type,Subtype,Weight(g),Pristine(%)
0,10001,Apollo11,Soil,Unsieved,125.8,88.36
1,10002,Apollo11,Soil,Unsieved,5629.0,93.73
2,10003,Apollo11,Basalt,Ilmenite,213.0,65.56
3,10004,Apollo11,Core,Unsieved,44.8,71.76
4,10005,Apollo11,Core,Unsieved,53.4,40.31


We need to fine out the weight of the rock samples brought back per mission. This will give us an idea of the weight of the rock sample that we can request the astronauts to bring back. 

An issue we face here is the weight of the samples is in grams(g) and the weight of the rockets is in Kilograms(Kg). Thus we need to convert the weight of the rock samples to Kg

In [4]:
rock_samples['Weight(g)']=rock_samples['Weight(g)'].apply(lambda x:int(x)*0.001)
rock_samples.rename(columns={'Weight(g)':'Weight(Kg)'}, inplace=True)
rock_samples.head()

Unnamed: 0,ID,Mission,Type,Subtype,Weight(Kg),Pristine(%)
0,10001,Apollo11,Soil,Unsieved,0.125,88.36
1,10002,Apollo11,Soil,Unsieved,5.629,93.73
2,10003,Apollo11,Basalt,Ilmenite,0.213,65.56
3,10004,Apollo11,Core,Unsieved,0.044,71.76
4,10005,Apollo11,Core,Unsieved,0.053,40.31


Now we create a new dataframe containing all the individual missions.

In [5]:
missions=pd.DataFrame()
missions['Mission']=rock_samples['Mission'].unique()
missions.head()

Unnamed: 0,Mission
0,Apollo11
1,Apollo12
2,Apollo14
3,Apollo15
4,Apollo16


In [6]:
#Calculating the weight of the samples collected per mission.
sample_total_weight=rock_samples.groupby('Mission')['Weight(Kg)'].sum()
missions=pd.merge(missions,sample_total_weight, on='Mission')
missions.rename(columns={'Weight(Kg)':'Sample Weight(Kg)'}, inplace=True)
missions

Unnamed: 0,Mission,Sample Weight(Kg)
0,Apollo11,21.54
1,Apollo12,34.331
2,Apollo14,41.744
3,Apollo15,75.252
4,Apollo16,92.114
5,Apollo17,109.089


In [7]:
#Difference in weight across missions
missions['Weight Diff(Kg)']=missions['Sample Weight(Kg)'].diff()
missions

Unnamed: 0,Mission,Sample Weight(Kg),Weight Diff(Kg)
0,Apollo11,21.54,
1,Apollo12,34.331,12.791
2,Apollo14,41.744,7.413
3,Apollo15,75.252,33.508
4,Apollo16,92.114,16.862
5,Apollo17,109.089,16.975


In [8]:
missions['Weight Diff(Kg)']=missions['Weight Diff(Kg)'].fillna(value=0)
missions

Unnamed: 0,Mission,Sample Weight(Kg),Weight Diff(Kg)
0,Apollo11,21.54,0.0
1,Apollo12,34.331,12.791
2,Apollo14,41.744,7.413
3,Apollo15,75.252,33.508
4,Apollo16,92.114,16.862
5,Apollo17,109.089,16.975


In [9]:
missions['Lunar Module (LM)'] = {'Eagle (LM-5)', 'Intrepid (LM-6)', 'Antares (LM-8)', 'Falcon (LM-10)', 'Orion (LM-11)', 'Challenger (LM-12)'}
missions['LM Mass (kg)'] = {15103, 15235, 15264, 16430, 16445, 16456}
missions['LM Mass Diff'] = missions['LM Mass (kg)'].diff()
missions['LM Mass Diff'] = missions['LM Mass Diff'].fillna(value=0)

missions['Command Module (CM)'] = {'Columbia (CSM-107)', 'Yankee Clipper (CM-108)', 'Kitty Hawk (CM-110)', 'Endeavor (CM-112)', 'Casper (CM-113)', 'America (CM-114)'}
missions['CM Mass (kg)'] = {5560, 5609, 5758, 5875, 5840, 5960}
missions['CM Mass Diff'] = missions['CM Mass (kg)'].diff()
missions['CM Mass Diff'] = missions['CM Mass Diff'].fillna(value=0)

missions

Unnamed: 0,Mission,Sample Weight(Kg),Weight Diff(Kg),Lunar Module (LM),LM Mass (kg),LM Mass Diff,Command Module (CM),CM Mass (kg),CM Mass Diff
0,Apollo11,21.54,0.0,Orion (LM-11),15264,0.0,Kitty Hawk (CM-110),5960,0.0
1,Apollo12,34.331,12.791,Antares (LM-8),15235,-29.0,Endeavor (CM-112),5609,-351.0
2,Apollo14,41.744,7.413,Falcon (LM-10),16456,1221.0,Yankee Clipper (CM-108),5840,231.0
3,Apollo15,75.252,33.508,Intrepid (LM-6),16430,-26.0,America (CM-114),5875,35.0
4,Apollo16,92.114,16.862,Eagle (LM-5),16445,15.0,Columbia (CSM-107),5560,-315.0
5,Apollo17,109.089,16.975,Challenger (LM-12),15103,-1342.0,Casper (CM-113),5758,198.0


In [10]:
missions['Total Weight (kg)'] = missions['LM Mass (kg)'] + missions['CM Mass (kg)']
missions['Total Weight Diff'] = missions['LM Mass Diff'] + missions['CM Mass Diff']
missions

Unnamed: 0,Mission,Sample Weight(Kg),Weight Diff(Kg),Lunar Module (LM),LM Mass (kg),LM Mass Diff,Command Module (CM),CM Mass (kg),CM Mass Diff,Total Weight (kg),Total Weight Diff
0,Apollo11,21.54,0.0,Orion (LM-11),15264,0.0,Kitty Hawk (CM-110),5960,0.0,21224,0.0
1,Apollo12,34.331,12.791,Antares (LM-8),15235,-29.0,Endeavor (CM-112),5609,-351.0,20844,-380.0
2,Apollo14,41.744,7.413,Falcon (LM-10),16456,1221.0,Yankee Clipper (CM-108),5840,231.0,22296,1452.0
3,Apollo15,75.252,33.508,Intrepid (LM-6),16430,-26.0,America (CM-114),5875,35.0,22305,9.0
4,Apollo16,92.114,16.862,Eagle (LM-5),16445,15.0,Columbia (CSM-107),5560,-315.0,22005,-300.0
5,Apollo17,109.089,16.975,Challenger (LM-12),15103,-1342.0,Casper (CM-113),5758,198.0,20861,-1144.0


In [11]:
# Sample-to-weight ratio
saturnVPayload = 43500
missions['Crewed Area : Payload'] = missions['Total Weight (kg)'] / saturnVPayload
missions['Sample : Crewed Area'] = missions['Sample Weight(Kg)'] / missions['Total Weight (kg)']
missions['Sample : Payload'] = missions['Sample Weight(Kg)'] / saturnVPayload
missions

Unnamed: 0,Mission,Sample Weight(Kg),Weight Diff(Kg),Lunar Module (LM),LM Mass (kg),LM Mass Diff,Command Module (CM),CM Mass (kg),CM Mass Diff,Total Weight (kg),Total Weight Diff,Crewed Area : Payload,Sample : Crewed Area,Sample : Payload
0,Apollo11,21.54,0.0,Orion (LM-11),15264,0.0,Kitty Hawk (CM-110),5960,0.0,21224,0.0,0.487908,0.001015,0.000495
1,Apollo12,34.331,12.791,Antares (LM-8),15235,-29.0,Endeavor (CM-112),5609,-351.0,20844,-380.0,0.479172,0.001647,0.000789
2,Apollo14,41.744,7.413,Falcon (LM-10),16456,1221.0,Yankee Clipper (CM-108),5840,231.0,22296,1452.0,0.512552,0.001872,0.00096
3,Apollo15,75.252,33.508,Intrepid (LM-6),16430,-26.0,America (CM-114),5875,35.0,22305,9.0,0.512759,0.003374,0.00173
4,Apollo16,92.114,16.862,Eagle (LM-5),16445,15.0,Columbia (CSM-107),5560,-315.0,22005,-300.0,0.505862,0.004186,0.002118
5,Apollo17,109.089,16.975,Challenger (LM-12),15103,-1342.0,Casper (CM-113),5758,198.0,20861,-1144.0,0.479563,0.005229,0.002508


In [12]:
crewedArea_payload_ratio = missions['Crewed Area : Payload'].mean()
sample_crewedArea_ratio = missions['Sample : Crewed Area'].mean()
sample_payload_ratio = missions['Sample : Payload'].mean()
print(crewedArea_payload_ratio)
print(sample_crewedArea_ratio)
print(sample_payload_ratio)

0.49630268199233724
0.0028872243961060398
0.001433218390804599


We don't have all the details about the Artemis mission, but we do know currently that three iterations of the rocket will be cycled through for each mission. Each rocket will have one version meant to sustain a crew and one meant only for cargo. For the purposes of this module, we will focus only on the three rockets meant to house crew, to be more aligned with the Apollo missions. We also know that the expected payload of the Space Launch System (SLS) is expected to grow with each iteration, but that the current weight of Orion (the command and lunar modules combined) has one estimated weight today.

Again, we will call the command and lunar modules the crewed area, and we can create a dataframe with the information we have about the three crewed missions

In [13]:
artemis_crewedArea = 26520
artemis_mission = pd.DataFrame({'Mission':['artemis1','artemis1b','artemis2'],
                                 'Total Weight (kg)':[artemis_crewedArea,artemis_crewedArea,artemis_crewedArea],
                                 'Payload (kg)':[26988, 37965, 42955]})
artemis_mission

Unnamed: 0,Mission,Total Weight (kg),Payload (kg)
0,artemis1,26520,26988
1,artemis1b,26520,37965
2,artemis2,26520,42955


In [14]:
#And we can estimate the weight of samples based on the ratios we determined from the Artemis missions
artemis_mission['Sample Weight from Total (kg)'] = artemis_mission['Total Weight (kg)'] * sample_crewedArea_ratio
artemis_mission['Sample Weight from Payload (kg)'] = artemis_mission['Payload (kg)'] * sample_payload_ratio
artemis_mission

Unnamed: 0,Mission,Total Weight (kg),Payload (kg),Sample Weight from Total (kg),Sample Weight from Payload (kg)
0,artemis1,26520,26988,76.569191,38.679698
1,artemis1b,26520,37965,76.569191,54.412136
2,artemis2,26520,42955,76.569191,61.563896


In [15]:
#Finally, we can get the average of the two predictions:
artemis_mission['Estimated Sample Weight (kg)'] = (artemis_mission['Sample Weight from Payload (kg)'] + artemis_mission['Sample Weight from Total (kg)'])/2
artemis_mission

Unnamed: 0,Mission,Total Weight (kg),Payload (kg),Sample Weight from Total (kg),Sample Weight from Payload (kg),Estimated Sample Weight (kg)
0,artemis1,26520,26988,76.569191,38.679698,57.624444
1,artemis1b,26520,37965,76.569191,54.412136,65.490664
2,artemis2,26520,42955,76.569191,61.563896,69.066543


In [16]:
#First, we can determine how much remains of each sample that was returned from the Apollo missions, given the amount that was originally collected and the percentage of remaining pristine sample.
rock_samples['Remaining(kg)'] = rock_samples['Weight(Kg)'] * (rock_samples['Pristine(%)'] * .01)
rock_samples.describe()

Unnamed: 0,ID,Weight(Kg),Pristine(%),Remaining(kg)
count,2229.0,2229.0,2229.0,2229.0
mean,52058.432032,0.16782,84.512764,0.137732
std,26207.651471,0.637353,22.057299,0.526009
min,10001.0,0.0,0.0,0.0
25%,15437.0,0.003,80.01,0.002
50%,65527.0,0.01,92.3,0.008
75%,72142.0,0.093,98.14,0.078
max,79537.0,11.729,180.0,11.169527


In [17]:
#This helps us see that, on average, each sample weighs about .16 kg and has about 84% of the 
#original amount remaining. We can use this knowledge to extract only the samples that are 
#likely running low, which means that they have been used a lot by researchers.
low_samples = rock_samples.loc[(rock_samples['Weight(Kg)'] >= .16) & (rock_samples['Pristine(%)'] <= 50)]
low_samples.head()

Unnamed: 0,ID,Mission,Type,Subtype,Weight(Kg),Pristine(%),Remaining(kg)
11,10017,Apollo11,Basalt,Ilmenite,0.973,43.71,0.425298
14,10020,Apollo11,Basalt,Ilmenite,0.425,27.88,0.11849
15,10021,Apollo11,Breccia,Regolith,0.25,30.21,0.075525
29,10045,Apollo11,Basalt,Olivine,0.185,12.13,0.022441
37,10057,Apollo11,Basalt,Ilmenite,0.919,35.15,0.323028


In [18]:
low_samples.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 27 entries, 11 to 2183
Data columns (total 7 columns):
ID               27 non-null int64
Mission          27 non-null object
Type             27 non-null object
Subtype          27 non-null object
Weight(Kg)       27 non-null float64
Pristine(%)      27 non-null float64
Remaining(kg)    27 non-null float64
dtypes: float64(3), int64(1), object(3)
memory usage: 1.7+ KB


Twenty-seven samples seems like a small amount to base a recommendation on. We can probably find some other samples that are needed for more research here on Earth. To discover them, we can use the unique() function to see how many unique types we have across the low_samples and rock_samples dataframes

In [19]:
low_samples.Type.unique()

array(['Basalt', 'Breccia', 'Soil', 'Core'], dtype=object)

In [20]:
rock_samples.Type.unique()

array(['Soil', 'Basalt', 'Core', 'Breccia', 'Special', 'Crustal'],
      dtype=object)

We can see that, although six unique types were collected across all samples, the samples that are running low are from only four unique types. But this doesn't tell us everything about the samples we might want to focus on. For example, in our low_samples dataframe, how many of each type are actually considered low?

In [22]:
low_samples.groupby('Type')['Weight(Kg)'].count()

Type
Basalt     14
Breccia     8
Core        1
Soil        4
Name: Weight(Kg), dtype: int64

Notice that there are more Basalt and Breccia type rocks with low samples than those of Core and Soil. Additionally, because the likelihood is high that every mission has some Core and Soil collection requirements, we can focus on the Basalt and Breccia rock types for the samples that we need to have collected:

In [23]:
needed_samples = low_samples[low_samples['Type'].isin(['Basalt', 'Breccia'])]
needed_samples.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 22 entries, 11 to 2183
Data columns (total 7 columns):
ID               22 non-null int64
Mission          22 non-null object
Type             22 non-null object
Subtype          22 non-null object
Weight(Kg)       22 non-null float64
Pristine(%)      22 non-null float64
Remaining(kg)    22 non-null float64
dtypes: float64(3), int64(1), object(3)
memory usage: 1.4+ KB


Let's take a step back and see how the number of samples compares to the amount of sample. We can compare the total weight from the needed_samples dataframe to the rock_samples dataframe. That is, we'll compare the samples we've identified as running low to all the samples collected on Apollo missions.

In [24]:
needed_samples.groupby('Type')['Weight(Kg)'].sum()

Type
Basalt     17.421
Breccia    10.118
Name: Weight(Kg), dtype: float64

In [25]:
rock_samples.groupby('Type')['Weight(Kg)'].sum()

Type
Basalt      93.005
Breccia    168.460
Core        19.910
Crustal      4.729
Soil        87.223
Special      0.743
Name: Weight(Kg), dtype: float64

One bit of information really stands out: we've never had a lot of Crustal rocks in the first place.

We can add Crustal rocks to the set of needed samples:

In [26]:
needed_samples = needed_samples.append(rock_samples.loc[rock_samples['Type'] == 'Crustal'])
needed_samples.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 68 entries, 11 to 2189
Data columns (total 7 columns):
ID               68 non-null int64
Mission          68 non-null object
Type             68 non-null object
Subtype          68 non-null object
Weight(Kg)       68 non-null float64
Pristine(%)      68 non-null float64
Remaining(kg)    68 non-null float64
dtypes: float64(3), int64(1), object(3)
memory usage: 4.2+ KB


The final step is to consolidate everything we know into one table that can be shared with the astronauts. First, we need a column for each type of rock that we have already identified as rocks we want more samples of:

In [27]:
needed_samples_overview = pd.DataFrame()
needed_samples_overview['Type'] = needed_samples.Type.unique()
needed_samples_overview

Unnamed: 0,Type
0,Basalt
1,Breccia
2,Crustal


In [29]:
needed_sample_weights = needed_samples.groupby('Type')['Weight(Kg)'].sum().reset_index()
needed_samples_overview = pd.merge(needed_samples_overview, needed_sample_weights, on='Type')
needed_samples_overview.rename(columns={'Weight(Kg)':'Total Weight(kg)'}, inplace=True)
needed_samples_overview

Unnamed: 0,Type,Total Weight(kg)
0,Basalt,17.421
1,Breccia,10.118
2,Crustal,4.729


When astronauts are up on the Moon, one way they can identify rocks is by their size. If we can give them an estimated size of each type of rock, that might make their collection process easier.

In [33]:
needed_sample_ave_weights = needed_samples.groupby('Type')['Weight(Kg)'].mean().reset_index()
needed_samples_overview = pd.merge(needed_samples_overview, needed_sample_ave_weights, on='Type')
needed_samples_overview.rename(columns={'Weight(Kg)':'Ave Weight(kg)'}, inplace=True)
needed_samples_overview

Unnamed: 0,Type,Total Weight(kg),Weight(Kg)_x,Weight(Kg)_y,Ave Weight(kg)
0,Basalt,17.421,1.244357,1.244357,1.244357
1,Breccia,10.118,1.26475,1.26475,1.26475
2,Crustal,4.729,0.102804,0.102804,0.102804


In [34]:
total_rock_count = rock_samples.groupby('Type')['ID'].count().reset_index()
needed_samples_overview = pd.merge(needed_samples_overview, total_rock_count, on='Type')
needed_samples_overview.rename(columns={'ID':'Number of Samples'}, inplace=True)
total_rocks = needed_samples_overview['Number of Samples'].sum()
needed_samples_overview['Percentage of Rocks'] = needed_samples_overview['Number of Samples'] / total_rocks
needed_samples_overview

Unnamed: 0,Type,Total Weight(kg),Weight(Kg)_x,Weight(Kg)_y,Ave Weight(kg),Number of Samples,Percentage of Rocks
0,Basalt,17.421,1.244357,1.244357,1.244357,351,0.25885
1,Breccia,10.118,1.26475,1.26475,1.26475,959,0.707227
2,Crustal,4.729,0.102804,0.102804,0.102804,46,0.033923


In [35]:
artemis_ave_weight = artemis_mission['Estimated Sample Weight (kg)'].mean()
artemis_ave_weight

64.0605505115232

In [36]:
needed_samples_overview['Weight to Collect'] = needed_samples_overview['Percentage of Rocks'] * artemis_ave_weight

needed_samples_overview['Rocks to Collect'] = needed_samples_overview['Weight to Collect'] / needed_samples_overview['Ave Weight(kg)']

needed_samples_overview

Unnamed: 0,Type,Total Weight(kg),Weight(Kg)_x,Weight(Kg)_y,Ave Weight(kg),Number of Samples,Percentage of Rocks,Weight to Collect,Rocks to Collect
0,Basalt,17.421,1.244357,1.244357,1.244357,351,0.25885,16.582045,13.325793
1,Breccia,10.118,1.26475,1.26475,1.26475,959,0.707227,45.30536,35.821593
2,Crustal,4.729,0.102804,0.102804,0.102804,46,0.033923,2.173146,21.138654
