In [1]:
!pip install polars beautifulsoup4 splinter selenium



In [2]:
!pip install pint



In [3]:
import polars as pl
import numpy as np
import pathlib
import re
from bs4 import BeautifulSoup
# Will use firefox browser
from splinter import Browser
import time
import pint

## Prepping the data. Do not include in main

In [4]:
cwd = pathlib.Path.cwd()

if cwd.name == 'Mild-Steel-Tempering':
    print("Path is project root")
else:
    print("Please correct current working directory to the project root")

Path is project root


In [5]:
resources_path = pathlib.PurePath(pathlib.PurePath(cwd), 'resources')
resources_path

PurePosixPath('/home/mox/Documents/coding_projects/bootcamp_local/Homeworks/Mild-Steel-Tempering/resources')

In [6]:
data_path = f"{resources_path}/MIF_search.csv"
df_data = pl.read_csv(data_path)
df_data.glimpse()

Rows: 8
Columns: 1
$ steel <str> 'AISI-SAE 4037', 'AISI 4047', 'AISI-SAE 1335', 'AISI 1049', 'AISI 52100', 'AISI-SAE 1042', 'AISI-SAE 9254', 'AISI-SAE 1080'



## Visit the Website

In [7]:
browser = Browser('firefox')
# https://www.makeitfrom.com/material-group/Wrought-Alloy-Steel-SAE-AISI
# OR
# https://www.makeitfrom.com/material-group/Wrought-Carbon-Or-Non-Alloy-Steel
base_url = "https://www.makeitfrom.com"
search_url = "https://duckduckgo.com/?hps=1&q="
search_suffix = "+site%3Amakeitfrom.com&atb=v427-1&ia=web"

### Rename some steel present in MIF that were not searching correctly

In [8]:
df_data

steel
str
"""AISI-SAE 4037"""
"""AISI 4047"""
"""AISI-SAE 1335"""
"""AISI 1049"""
"""AISI 52100"""
"""AISI-SAE 1042"""
"""AISI-SAE 9254"""
"""AISI-SAE 1080"""


In [9]:
len([print(steel) for steel in df_data['steel']])

AISI-SAE 4037
AISI 4047
AISI-SAE 1335
AISI 1049
AISI 52100
AISI-SAE 1042
AISI-SAE 9254
AISI-SAE 1080


8

In [10]:
steels_to_search_MIF = df_data['steel']

# scrape a single page
grab cold drawn/rolled steel. The data better matches the AZoM data

In [11]:
steel = "AISI 1049"
steel_url = "/material-properties/SAE-AISI-1049-G10490-Carbon-Steel"

test vs https://www.makeitfrom.com/material-properties/SAE-AISI-4047-G40470-Molybdenum-Steel

Do not need to nav deeper for this one

In [12]:
steel = "AISI-SAE 4027"

In [13]:
def handle_ranges(values_list):
    if values_list[1] == 'to':
        # map the range of values to a list of integers, then take the mean of it.
        value = np.mean(list(map(int, values_list[0:3:2])))
        units = values_list[3]
    else:
        value = np.float64(values_list[0])
        units = values_list[1]
    return value, units

In [14]:
def get_row(soup):
    mech_props = soup.find_all('div', class_='mech')
    # -2 is a magic number
    # the last 2 values are always tensile strength
    uts = 'no_label'
    uts_units = 'no_units'
    ys = 'no_label'
    ys_units = 'no_units'

    for div in mech_props[-2:]:
        ps = div.find_all('p')
        # get the property name
        label = ps[0].text
        # get the string of values and units and split it
        values = ps[1].text.split(' ')
        # Grab the metric value 
        value, units = handle_ranges(values)
        if 'uts' in label.lower():
            uts = value
            uts_units = units
        elif 'yield' in label.lower():
            ys = value
            ys_units = units
    row = (steel, ys, ys_units, uts, uts_units)
    return row


In [15]:
def handle_cold_drawn():
    # Browser is already on steel page
    soup = BeautifulSoup(browser.html, 'html.parser')
    cold_drawn_present = True
    steel_page = soup.find('div', class_='split links break-mid')
    # https://stackoverflow.com/questions/33404049/navigation-with-beautifulsoup
    
    cold_drawn = steel_page.find('a', string= lambda text: text \
                            #cold drawn matches values from AZoM for steels that are on both
                                and "Cold Drawn" in text\
                                # not "and" removed other processing in addtional to cold drawn if applicable
                                and not "and" in text) 
    try: 
        cold_drawn_page = cold_drawn.get('href')
        time.sleep(0.15)
        browser.visit(f'{base_url}{cold_drawn_page}')
    except AttributeError:
        # do not go to the cold drawn page if not present
        pass    

In [18]:

list_of_scraped = [("steel", "ys", "ys_unit", "uts", "uts_unit")] 
for steel in df_data['steel']:
    print("Working on:", steel)
    #search MIF for the steel
    time.sleep(0.15)
    browser.visit(f"{search_url}{steel}{search_suffix}")
    soup_search = BeautifulSoup(browser.html, 'html.parser')
    steel_link = soup_search.find('a',{'data-testid': "result-extras-url-link"}).get('href')
    
    time.sleep(0.15)
    browser.visit(steel_link)
    # handle cold drawn will land us on the desired steel page
    handle_cold_drawn()
    soup_results = BeautifulSoup(browser.html, "html.parser")
    list_of_scraped.append(get_row(soup_results))
list_of_scraped


Working on: AISI-SAE 4037
Working on: AISI 4047
Working on: AISI-SAE 1335
Working on: AISI 1049
Working on: AISI 52100
Working on: AISI-SAE 1042
Working on: AISI-SAE 9254
Working on: AISI-SAE 1080


[('steel', 'ys', 'ys_unit', 'uts', 'uts_unit'),
 ('AISI-SAE 4037', np.float64(290.0), 'MPa', np.float64(540.0), 'MPa'),
 ('AISI 4047', np.float64(310.0), 'MPa', np.float64(580.0), 'MPa'),
 ('AISI-SAE 1335', np.float64(300.0), 'MPa', np.float64(550.0), 'MPa'),
 ('AISI 1049', np.float64(640.0), 'MPa', np.float64(750.0), 'MPa'),
 ('AISI 52100', np.float64(460.0), 'MPa', np.float64(1300.0), 'MPa'),
 ('AISI-SAE 1042', np.float64(580.0), 'MPa', np.float64(700.0), 'MPa'),
 ('AISI-SAE 9254', np.float64(410.0), 'MPa', np.float64(660.0), 'MPa'),
 ('AISI-SAE 1080', np.float64(535.0), 'MPa', np.float64(820.0), 'MPa')]

In [22]:
df_MIF = pl.DataFrame(list_of_scraped, orient='row')
df_MIF = df_MIF.rename(df_MIF.head(1).to_dicts().pop())[1:]
df_MIF

steel,ys,ys_unit,uts,uts_unit
str,str,str,str,str
"""AISI-SAE 4037""","""290""","""MPa""","""540""","""MPa"""
"""AISI 4047""","""310""","""MPa""","""580""","""MPa"""
"""AISI-SAE 1335""","""300""","""MPa""","""550""","""MPa"""
"""AISI 1049""","""640""","""MPa""","""750""","""MPa"""
"""AISI 52100""","""460""","""MPa""","""1300""","""MPa"""
"""AISI-SAE 1042""","""580""","""MPa""","""700""","""MPa"""
"""AISI-SAE 9254""","""410""","""MPa""","""660""","""MPa"""
"""AISI-SAE 1080""","""535""","""MPa""","""820""","""MPa"""


In [24]:
df_MIF_clean = df_MIF.select(['steel', 'ys', 'uts']).rename({'ys' : 'yield_strength_MPa',
                                                             'uts' : 'ultimate_strength_MPA'})
df_MIF_clean

steel,yield_strength_MPa,ultimate_strength_MPA
str,str,str
"""AISI-SAE 4037""","""290""","""540"""
"""AISI 4047""","""310""","""580"""
"""AISI-SAE 1335""","""300""","""550"""
"""AISI 1049""","""640""","""750"""
"""AISI 52100""","""460""","""1300"""
"""AISI-SAE 1042""","""580""","""700"""
"""AISI-SAE 9254""","""410""","""660"""
"""AISI-SAE 1080""","""535""","""820"""


# Itterations

In [17]:
# Code to force an error so this doesn't run when run all
=

SyntaxError: invalid syntax (2918180382.py, line 2)

In [109]:
def handle_ranges(values_list):
    if values_list[1] == 'to':
        # map the range of values to a list of integers, then take the mean of it.
        value = np.mean(list(map(int, values_list[0:3:2])))
        units = values_list[3]
    else:
        value = np.float64(values_list[0])
        units = values_list[1]
    return value, units

In [110]:

soup = BeautifulSoup(browser.html, "html.parser")
mech_props = soup.find_all('div', class_='mech')
# -2 is a magic number
# the last 2 values are always tensile strength
uts = 'no_label'
uts_units = 'no_units'
ys = 'no_label'
ys_units = 'no_units'

for div in mech_props[-2:]:
    ps = div.find_all('p')
    # get the property name
    label = ps[0].text
    # get the string of values and units and split it
    values = ps[1].text.split(' ')
    # Grab the metric value 
    value, units = handle_ranges(values)
    if 'uts' in label.lower():
        uts = value
        uts_units = units
    elif 'yield' in label.lower():
        ys = value
        ys_units = units
row = (steel, ys, ys_units, uts, uts_units)
print(row)

('AISI-SAE 1080', np.float64(505.0), 'MPa', np.float64(715.0), 'MPa')
