In [None]:
!pip install foundry-ml

In [None]:
from foundry import Foundry
f = Foundry()

# Load the data here!
f.load('10.18126/524z-vd6m', globus=False)
res = f.load_data()

In [None]:
import pandas as pd

In [None]:
X_train, y_train = res['train'][0], res['train'][1]

In [None]:
X_train

In [None]:
from sklearn.neighbors import KNeighborsRegressor

In [None]:
neigh = KNeighborsRegressor()
X_just_numbers = X_train.drop(["formula", "cr_amount"], axis=1)
y_just_tensile = y_train.drop(["elongation", "yield strength"], axis=1)

In [None]:
neigh.fit(X_just_numbers, y_just_tensile)

In [None]:
import pandas as pd
import re

def composition_to_weight_percent(composition_string):
    atomic_weights = {
        'Fe': 55.845, 'C': 12.011, 'Mn': 54.938045, 'Si': 28.0855, 'Cr': 51.9961,
        'Ni': 58.6934, 'Mo': 95.96, 'V': 50.9415, 'Nb': 92.90638, 'Co': 58.933195,
        'W': 183.84, 'Al': 26.9815386, 'Ti': 47.867, "N": 28.014
    }
    elements_to_show = ['c', 'mn', 'si', 'cr', 'ni', 'mo', 'v', 'n', 'nb', 'co', 'w', 'al', 'ti']

    # Parse the composition string
    composition = re.findall(r'([A-Z][a-z]?)(\d+\.\d+)', composition_string)
    elements, fractions = zip(*composition)
    fractions = [float(f) for f in fractions]

    # Calculate the weights
    weights = [f * atomic_weights[e] for e, f in zip(elements, fractions)]
    total_weight = sum(weights)

    # Calculate weight percentages
    weight_percentages = [w / total_weight * 100 for w in weights]

    lowercase_elements = [e.lower() for e in elements]
    missing_elements = []
    for e in elements_to_show:
      if e not in lowercase_elements:
        missing_elements.append(e)
    lowercase_elements += missing_elements
    weight_percentages += [0.0] * len(missing_elements)


    # Create DataFrame
    df = pd.DataFrame([weight_percentages], columns=lowercase_elements)
    df = df.drop(["fe"], axis=1)
    df = df[elements_to_show]
    return df

composition_string = 'Fe0.620C0.000953Mn0.000521Si0.00102Cr0.000110Ni0.192Mo0.0176V0.000112Nb0.0000616Co0.146Al0.00318Ti0.0185'
print(composition_to_weight_percent(composition_string))

          c        mn        si        cr         ni        mo         v    n  \
0  0.019996  0.050002  0.050045  0.009992  19.686351  2.950378  0.009967  0.0   

         nb         co    w        al        ti  
0  0.009998  15.030989  0.0  0.149889  1.546973  


In [None]:
preprocessed = composition_to_weight_percent(composition_string)

In [None]:
preprocessed

Unnamed: 0,c,mn,si,cr,ni,mo,v,n,nb,co,w,al,ti
0,0.019996,0.050002,0.050045,0.009992,19.686351,2.950378,0.009967,0.0,0.009998,15.030989,0.0,0.149889,1.546973


In [None]:
neigh.predict(preprocessed)

array([[2435.38]])

In [None]:
import pickle

# Save the model to disk
with open('model.pkl', 'wb') as f:
    pickle.dump(neigh, f)

In [None]:
!pip freeze

absl-py==1.4.0
alabaster==0.7.13
albumentations==1.2.1
altair==4.2.2
anyio==3.6.2
appdirs==1.4.4
argon2-cffi==21.3.0
argon2-cffi-bindings==21.2.0
array-record==0.2.0
arviz==0.15.1
astropy==5.2.2
astunparse==1.6.3
attrs==23.1.0
audioread==3.0.0
autograd==1.5
Babel==2.12.1
backcall==0.2.0
beautifulsoup4==4.11.2
bleach==6.0.0
blis==0.7.9
blosc2==2.0.0
bokeh==2.4.3
branca==0.6.0
build==0.10.0
CacheControl==0.12.11
cached-property==1.5.2
cachetools==5.3.0
catalogue==2.0.8
certifi==2022.12.7
cffi==1.15.1
chardet==4.0.0
charset-normalizer==2.0.12
chex==0.1.7
click==8.1.3
cloudpickle==2.2.1
cmake==3.25.2
cmdstanpy==1.1.0
colorcet==3.0.1
colorlover==0.3.0
community==1.0.0b1
confection==0.0.4
cons==0.4.5
contextlib2==0.6.0.post1
contourpy==1.0.7
convertdate==2.4.0
cryptography==40.0.2
cufflinks==0.17.3
cvxopt==1.3.0
cvxpy==1.3.1
cycler==0.11.0
cymem==2.0.7
Cython==0.29.34
dask==2022.12.1
datascience==0.17.6
db-dtypes==1.1.1
dbus-python==1.2.16
debugpy==1.6.6
decorator==4.4.2
defusedxml==0.7.1
di