In [1]:
!pip install polars beautifulsoup4 splinter selenium



In [2]:
!pip install pint



In [3]:
import polars as pl
import numpy as np
import pathlib
import re
from bs4 import BeautifulSoup
# Will use firefox browser
from splinter import Browser
import time
import pint

## Prepping the data. Do not include in main

In [4]:
cwd = pathlib.Path.cwd()

if cwd.name == 'Mild-Steel-Tempering':
    print("Path is project root")
else:
    print("Please correct current working directory to the project root")

Path is project root


In [5]:
resources_path = pathlib.PurePath(pathlib.PurePath(cwd), 'resources')
resources_path

PurePosixPath('/home/mox/Documents/coding_projects/bootcamp_local/Homeworks/Mild-Steel-Tempering/resources')

In [6]:
data_path = f"{resources_path}/searchable_steels.csv"
df_data = pl.read_csv(data_path)
df_data.glimpse()

Rows: 29
Columns: 1
$ searchable <str> 'AISI 1090', 'AISI-SAE 4037', 'AISI-SAE 5140', 'AISI-SAE 1042', 'AISI-SAE 4140', 'AISI-SAE 1080', 'AISI-SAE 1045', 'AISI-SAE 1050', 'AISI-SAE 1035', 'AISI 6145'



## Visit the Website

In [7]:
browser = Browser('firefox')
base_url = "https://www.azom.com"
search_path = "/search.aspx?q="

### Rename some steel present in AZoM that were not searching correctly

In [8]:
df_data

searchable
str
"""AISI 1090"""
"""AISI-SAE 4037"""
"""AISI-SAE 5140"""
"""AISI-SAE 1042"""
"""AISI-SAE 4140"""
…
"""AISI-SAE 9254"""
"""AISI-SAE 1065"""
"""AISI-SAE 1040"""
"""AISI-SAE 6150"""


In [9]:
len([print(steel) for steel in df_data['searchable']])

AISI 1090
AISI-SAE 4037
AISI-SAE 5140
AISI-SAE 1042
AISI-SAE 4140
AISI-SAE 1080
AISI-SAE 1045
AISI-SAE 1050
AISI-SAE 1035
AISI 6145
AISI-SAE 1335
AISI-SAE 1038
AISI-SAE 5160
AISI 4640
AISI 1095
AISI 4047
AISI 1055
AISI 1074 Carbon Steel
AISI-SAE 4340
AISI-SAE 4027
AISI 52100
AISI-SAE 1030
AISI 1030
AISI 1049
AISI-SAE 9254
AISI-SAE 1065
AISI-SAE 1040
AISI-SAE 6150
AISI-SAE 1026


29

In [10]:
# Search the steel 
def get_soup(soup):
    search_result = soup.find('div', class_='resultsContainer')
    first_item = search_result.find('a')
    # https://pytutorial.com/get-element-href-beautifulsoup/
    steel_link = first_item.get('href')
    time.sleep(0.15)
    browser.visit(f"{base_url}{steel_link}")
    soup2 = BeautifulSoup(browser.html, 'html.parser')
    return soup2



In [11]:
# already have elemental composition
# property - metric - imperial
# table_elements = tables[0]
# table_phys_props = tables[1]
# table_mecha_props = tables[2]
# table_therm_props = tables[3]
# table_other_desigs = tables[4]
#parse tables
def get_tables(soup):
    tables = []
    # parse all tables in the
    for html_table in soup.find_all('table'):
        table = []
        # parse rows in the table
        for tr in html_table.find_all('tr'):
            row = []
            #parse data cells in the row
            for t in tr.find_all(['th', 'td']):
                text = t.get_text(strip=True)
                #add data to row
                row.append(text)
            #add row to table
            table.append(row)
    #returns a list of list of lists
        tables.append(table)
    return tables
    

In [12]:
def make_dfs(lolols):
    tables = []
    # convert the list of lists to a dataframe
    for lol in lolols:
        try:
            df = pl.DataFrame(lol, orient='row')
            # rename columns using first row
            # https://stackoverflow.com/questions/75187317/how-to-rename-column-names-with-first-row-in-polars
            # [1:] removes the first row that replaced the column names 
            df = df.rename(df.head(1).to_dicts().pop())[1:]
            # add the dataframe to the list of tables on for this steel
            tables.append(df)
        except:
            tables.append(lol)
    return tables

In [13]:
dict_steel_tables = {}
try:
    for steel in df_data['searchable']:
        print("Working on:", steel)
        #search AZoM for the steel
        browser.visit(f"{base_url}{search_path}{steel}")
        soup_search = BeautifulSoup(browser.html, 'html.parser')
    
        #takes soup html and returns list of lists
        soup = get_soup(soup_search)
        lolols = get_tables(soup)
        dict_steel_tables[steel] = make_dfs(lolols)
        # tables list of lists and returns a dictionary
        # robots.txt indicates Crawl-delay: 120
        time.sleep(0.15)
except: 
    dict_steel_tables[steel] = []

Working on: AISI 1090
Working on: AISI-SAE 4037
Working on: AISI-SAE 5140
Working on: AISI-SAE 1042
Working on: AISI-SAE 4140
Working on: AISI-SAE 1080
Working on: AISI-SAE 1045
Working on: AISI-SAE 1050
Working on: AISI-SAE 1035
Working on: AISI 6145
Working on: AISI-SAE 1335
Working on: AISI-SAE 1038
Working on: AISI-SAE 5160
Working on: AISI 4640
Working on: AISI 1095
Working on: AISI 4047
Working on: AISI 1055
Working on: AISI 1074 Carbon Steel
Working on: AISI-SAE 4340
Working on: AISI-SAE 4027
Working on: AISI 52100
Working on: AISI-SAE 1030
Working on: AISI 1030
Working on: AISI 1049
Working on: AISI-SAE 9254


In [14]:
dict_steel_tables

{'AISI 1090': [shape: (5, 2)
  ┌────────────────┬───────────────┐
  │ Element        ┆ Content (%)   │
  │ ---            ┆ ---           │
  │ str            ┆ str           │
  ╞════════════════╪═══════════════╡
  │ Iron, Fe       ┆ 98.03 - 98.55 │
  │ Carbon, C      ┆ 0.85 - 0.980  │
  │ Manganese, Mn  ┆ 0.60 - 0.90   │
  │ Sulfur, S      ┆ ≤ 0.050       │
  │ Phosphorous, P ┆ ≤ 0.040       │
  └────────────────┴───────────────┘,
  shape: (1, 3)
  ┌────────────┬────────────┬──────────────┐
  │ Properties ┆ Metric     ┆ Imperial     │
  │ ---        ┆ ---        ┆ ---          │
  │ str        ┆ str        ┆ str          │
  ╞════════════╪════════════╪══════════════╡
  │ Density    ┆ 7.85 g/cm3 ┆ 0.284 lb/in³ │
  └────────────┴────────────┴──────────────┘,
  shape: (13, 3)
  ┌─────────────────────────────────┬─────────────┬─────────────────┐
  │ Properties                      ┆ Metric      ┆ Imperial        │
  │ ---                             ┆ ---         ┆ ---             │
  │ 

# Working with list of tables 
dict_steel_tables

Filter out "Other Designations" tables that did not get sucessfully converted to a polars dataframe (table 4)

Remove tables with properties that are not commonly present (thermal propterties table 3)

Remove elemental compositions, already present in the data (table 0)

Physical and mechanical properties are desired (tables 1,2)

In [15]:
dict_steel_tables_keep = {}
bool_check = True
polars_check = True 
for steel, tables in dict_steel_tables.items():
    all_polars = True
    count_polars = 0
    for table in tables:
        if isinstance(table, pl.DataFrame): 
            count_polars += 1 
    if count_polars != len(tables):
        all_polars = False
    if not (bool_check and all_polars):
        bool_check = False
    print("All polars?", all_polars, '|', len(tables), "Tables | Steel:", steel)
    dict_steel_tables_keep[steel] = tables[1:3]
print(polars_check)

All polars? True | 4 Tables | Steel: AISI 1090
All polars? True | 4 Tables | Steel: AISI-SAE 4037
All polars? True | 5 Tables | Steel: AISI-SAE 5140
All polars? True | 4 Tables | Steel: AISI-SAE 1042
All polars? True | 5 Tables | Steel: AISI-SAE 4140
All polars? True | 4 Tables | Steel: AISI-SAE 1080
All polars? True | 5 Tables | Steel: AISI-SAE 1045
All polars? True | 4 Tables | Steel: AISI-SAE 1050
All polars? False | 5 Tables | Steel: AISI-SAE 1035
All polars? True | 4 Tables | Steel: AISI 6145
All polars? True | 4 Tables | Steel: AISI-SAE 1335
All polars? True | 4 Tables | Steel: AISI-SAE 1038
All polars? True | 4 Tables | Steel: AISI-SAE 5160
All polars? True | 4 Tables | Steel: AISI 4640
All polars? True | 5 Tables | Steel: AISI 1095
All polars? True | 4 Tables | Steel: AISI 4047
All polars? True | 4 Tables | Steel: AISI 1055
All polars? True | 3 Tables | Steel: AISI 1074 Carbon Steel
All polars? False | 5 Tables | Steel: AISI-SAE 4340
All polars? True | 4 Tables | Steel: AISI-SA

# 2 "tables" are not polars dataframes
Check and remove if necessary

In [16]:
dict_steel_tables_keep['AISI-SAE 1035']

[shape: (1, 3)
 ┌────────────┬────────────┬──────────────┐
 │ Properties ┆ Metric     ┆ Imperial     │
 │ ---        ┆ ---        ┆ ---          │
 │ str        ┆ str        ┆ str          │
 ╞════════════╪════════════╪══════════════╡
 │ Density    ┆ 7.85 g/cm3 ┆ 0.284 lb/in3 │
 └────────────┴────────────┴──────────────┘,
 shape: (13, 3)
 ┌─────────────────────────────────┬─────────────┬─────────────────┐
 │ Properties                      ┆ Metric      ┆ Imperial        │
 │ ---                             ┆ ---         ┆ ---             │
 │ str                             ┆ str         ┆ str             │
 ╞═════════════════════════════════╪═════════════╪═════════════════╡
 │ Tensile strength, ultimate      ┆ 585 MPa     ┆ 84800 psi       │
 │ Tensile strength, yield         ┆ 370 MPa     ┆ 53700 psi       │
 │ Modulus of elasticity           ┆ 190-210 GPa ┆ 29700-30458 ksi │
 │ Bulk modulus (typical for stee… ┆ 140 GPa     ┆ 20300 ksi       │
 │ Shear modulus (typical for ste… ┆ 80

## Check polars desired tables were kept

Split desired tables into 2 dictionaries of tables

In [17]:
dict_phys_props = {}
dict_mech_props = {}

polars_check = True 
try:
    for steel, tables in dict_steel_tables_keep.items():
        all_polars = True
        count_polars = 0
        for table in tables:
            if isinstance(table, pl.DataFrame): 
                count_polars += 1 
        if count_polars != len(tables):
            all_polars = False
        if not (bool_check and all_polars):
            bool_check = False
        print("All polars?", all_polars, '|', len(tables), "Tables | Steel:", steel)

        phys_props = tables[0]
        mech_props = tables[1]
        print("Physical properties:", phys_props.shape, "| Properties head(1)", phys_props.select(pl.col('Properties').head(1)) )
        print("Mechanical propsshape:", mech_props.shape, "| Properties head(1)", mech_props.select(pl.col('Properties').head(1)) )
        dict_phys_props[steel] = phys_props
        dict_mech_props[steel] = mech_props
except:
    dict_phys_props[steel] = pl.DataFrame({'Properties' : [], 'Metric' : []})
    dict_mech_props[steel] = pl.DataFrame({'Properties' : [], 'Metric' : []})

print("All Polars?", polars_check)

All polars? True | 2 Tables | Steel: AISI 1090
Physical properties: (1, 3) | Properties head(1) shape: (1, 1)
┌────────────┐
│ Properties │
│ ---        │
│ str        │
╞════════════╡
│ Density    │
└────────────┘
Mechanical propsshape: (13, 3) | Properties head(1) shape: (1, 1)
┌──────────────────┐
│ Properties       │
│ ---              │
│ str              │
╞══════════════════╡
│ Tensile strength │
└──────────────────┘
All polars? True | 2 Tables | Steel: AISI-SAE 4037
Physical properties: (1, 3) | Properties head(1) shape: (1, 1)
┌────────────┐
│ Properties │
│ ---        │
│ str        │
╞════════════╡
│ Density    │
└────────────┘
Mechanical propsshape: (10, 3) | Properties head(1) shape: (1, 1)
┌─────────────────┐
│ Properties      │
│ ---             │
│ str             │
╞═════════════════╡
│ Elastic modulus │
└─────────────────┘
All polars? True | 2 Tables | Steel: AISI-SAE 5140
Physical properties: (1, 3) | Properties head(1) shape: (1, 1)
┌────────────┐
│ Properties │
│ -

# Filter for complete data, and select unit standard

Kpep only property and metric column from both sets of tables

Find which properties are present in all tables of each type

In [18]:
def remove_inperial(property_dict):
    new_dict = {}
    for key, table in property_dict.items():
        new_dict[key] = table.select(["Properties", "Metric"])
    return new_dict
dict_phys_props =  remove_inperial(dict_phys_props)
dict_mech_props = remove_inperial(dict_mech_props)


dict_phys_props['AISI 52100']

Properties,Metric
str,str
"""Density""","""7.81 g/cm3"""
"""Melting point""","""1424°C"""


In [19]:
dict_mech_props['AISI 52100']

Properties,Metric
str,str
"""Bulk modulus (typical for stee…","""140 GPa"""
"""Shear modulus (typical for ste…","""80 GPa"""
"""Elastic modulus""","""190-210 GPa"""
"""Poisson's ratio""","""0.27-0.30"""
"""Hardness, Brinell""","""-"""
…,…
"""Hardness, Rockwell C (quenched…","""64"""
"""Hardness, Rockwell C (quenched…","""64"""
"""Hardness, Rockwell C (quenched…","""66"""
"""Hardness, Vickers (converted f…","""848"""


# Properties to keep

Only properties present in all metals are the following:

Physical
* Density

Mechanical
* Elastic modulus
* Poisson's ratio
* Ultimate Tensile Strength
* Yield Strength

## Process Density into a df

# Convert to single dataframe

Save as intermediate working copy

Steel|property|property_units|property2|property2_units|etc|
---|---|---|---|---|--
|||||


In [20]:
#fix formatting in single cell. Did not incude a space
dict_phys_props['AISI 1074 Carbon Steel'][0, "Metric"] = '7.7-8.03 g/cm3'

In [21]:
s_steel = pl.Series(name = 'steel', dtype= pl.String)
s_density = pl.Series(name = 'density', dtype= pl.String)
s_unit = pl.Series(name = 'units_density', dtype= pl.String)

for steel, table in dict_phys_props.items():
    try:
        #select only the  cell of interest
        df_density = table.filter(pl.col('Properties').str.contains("Density"))['Metric'].str.split(' ')
        density = df_density.list.get(0)
        unit = df_density.list.get(1)
        # Make a series of all 3
        s_steel.extend(pl.Series(name = 'steel', values = [steel]))
        s_density = s_density.extend(density)
        s_unit = s_unit.extend(unit)
    except:
        print("Failed on", steel)

Failed on AISI-SAE 9254


In [22]:
df_steel_properties = pl.DataFrame([s_steel, s_density, s_unit])
df_steel_properties

steel,density,units_density
str,str,str
"""AISI 1090""","""7.85""","""g/cm3"""
"""AISI-SAE 4037""","""7.85""","""g/cm3"""
"""AISI-SAE 5140""","""7.85""","""g/cm3"""
"""AISI-SAE 1042""","""7.844""","""g/cm3"""
"""AISI-SAE 4140""","""7.85""","""g/cm3"""
…,…,…
"""AISI-SAE 4027""","""7.85""","""g/cm3"""
"""AISI 52100""","""7.81""","""g/cm3"""
"""AISI-SAE 1030""","""7.85""","""g/cc"""
"""AISI 1030""","""7.85""","""g/cc"""


## Process mechanical properties

In [76]:
steel = "AISI-SAE 1030"
table = dict_mech_props[steel]
table
table.filter(pl.col('Properties').str.to_lowercase().str.contains("tensile") & 
             pl.col('Properties').str.to_lowercase().str.contains("yield").not_())\
             ['Metric'].str.split(' ')

Metric
list[str]
"[""525"", ""MPa""]"


In [79]:
azom_sucessful = {'steel':[],'AZoM-Successful':[]}
list_of_scraped = [("steel", "pr", "em", "em_unit", "ys", "ys_unit", "uts", "uts_unit")]

pr = None
em = None
em_unit = None
ys = None
ys_unit = None
uts = None
uts_unit = None

# Grab values and split into units where necessary
pr = table.filter(pl.col('Properties')\
                  .str.to_lowercase()\
                    .str.contains("poisson"))['Metric']

df_em = table.filter(pl.col('Properties')\
                     .str.to_lowercase()\
                     .str.contains("elastic"))\
                      ['Metric'].str.split(' ')
em = df_em.list.get(0)
em_unit = df_em.list.get(1)

# grab yield strength
df_ys = table.filter(pl.col('Properties')\
                     .str.to_lowercase()\
                      .str.contains("yield"))\
                        ['Metric'].str.split(' ')
ys = df_ys.list.get(0)
ys_unit = df_ys.list.get(1)

# grab strength value that is not yield strength.
# Tensile strength or ultimate yield strength or ultimate tensile strength. All the same measurement
df_uts = table.filter(pl.col('Properties').str.to_lowercase().str.contains("strength") & 
                      pl.col('Properties').str.to_lowercase().str.contains("yield").not_())\
                        ['Metric'].str.split(' ')
uts = df_uts.list.get(0)
uts_unit = df_uts.list.get(1)

# Make a tuple for each row
try: 
  row = (steel, pr[0], em[0], em_unit[0], ys[0], ys_unit[0], uts[0], uts_unit[0])
except:
  # throw exception if values were not present on AZOM 
  raise ValueError("null value present")

list_of_scraped.append(row)
azom_sucessful['steel'].append(steel)
azom_sucessful['AZoM-Successful'].append(True)

print(azom_sucessful)
list_of_scraped


ValueError: null value present

In [80]:
# list of tuples if data was sucessfully scraped from AZoM
azom_sucessful = {'steel':[],'AZoM-Successful':[]}
list_of_scraped = [("steel", "pr", "em", "em_unit", "ys", "ys_unit", "uts", "uts_unit")]

for steel, table in dict_mech_props.items():
    pr = None
    em = None
    em_unit = None
    ys = None
    ys_unit = None
    uts = None
    uts_unit = None

    try:
        # Grab values and split into units where necessary
        pr = table.filter(pl.col('Properties')\
                        .str.to_lowercase()\
                            .str.contains("poisson"))['Metric']

        df_em = table.filter(pl.col('Properties')\
                            .str.to_lowercase()\
                            .str.contains("elastic"))\
                            ['Metric'].str.split(' ')
        em = df_em.list.get(0)
        em_unit = df_em.list.get(1)

        # grab yield strength
        df_ys = table.filter(pl.col('Properties')\
                            .str.to_lowercase()\
                            .str.contains("yield"))\
                                ['Metric'].str.split(' ')
        ys = df_ys.list.get(0)
        ys_unit = df_ys.list.get(1)

        # grab strength value that is not yield strength.
        # Tensile strength or ultimate yield strength or ultimate tensile strength. All the same measurement
        df_uts = table.filter(pl.col('Properties').str.to_lowercase().str.contains("strength") & 
                            pl.col('Properties').str.to_lowercase().str.contains("yield").not_())\
                                ['Metric'].str.split(' ')
        uts = df_uts.list.get(0)
        uts_unit = df_uts.list.get(1)

        # Make a tuple for each row
        try: 
            row = (steel, pr[0], em[0], em_unit[0], ys[0], ys_unit[0], uts[0], uts_unit[0])
        except:
        # throw exception if values were not present on AZOM 
            raise ValueError("null value present")

        list_of_scraped.append(row)
        azom_sucessful['steel'].append(steel)
        azom_sucessful['AZoM-Successful'].append(True)
        
    except:
        print("Failed on", steel)
        azom_sucessful['steel'].append(steel)
        azom_sucessful['AZoM-Successful'].append(False)

df_AZoM_successfull = pl.DataFrame(azom_sucessful)
df_AZoM_successfull.filter(pl.col('AZoM-Successful')==False)

Failed on AISI-SAE 4037
Failed on AISI-SAE 1042
Failed on AISI-SAE 1080
Failed on AISI-SAE 1335
Failed on AISI 4047
Failed on AISI 52100
Failed on AISI 1049
Failed on AISI-SAE 9254


steel,AZoM-Successful
str,bool
"""AISI-SAE 4037""",False
"""AISI-SAE 1042""",False
"""AISI-SAE 1080""",False
"""AISI-SAE 1335""",False
"""AISI 4047""",False
"""AISI 52100""",False
"""AISI 1049""",False
"""AISI-SAE 9254""",False


In [52]:
df_temp = pl.DataFrame([s_steel, s_pr, s_em, s_unit_em, s_uts, s_unit_uts, s_ys, s_unit_ys])
df_temp

ShapeError: could not create a new DataFrame: series 24 has length 24 while series "elastic_modulus" has length 15

In [25]:

df_steel_properties = df_steel_properties.join(df_temp, on='steel', how='inner')
df_steel_properties.write_csv(f'{resources_path}/AZoM_scraped_properties.csv')

In [None]:
df_AZoM_successfull

# Itteration and troubleshooting code

In [26]:
#Force and error so this code doesn't run with a run all
=

SyntaxError: invalid syntax (866440967.py, line 2)

In [16]:
tables = []
# parse all tables in the
for html_table in soup.find_all('table'):
    table = []
    # parse rows in the table
    for tr in html_table.find_all('tr'):
        row = []
        #parse data cells in the row
        for t in tr.find_all(['th', 'td']):
            text = t.get_text(strip=True)
            #add data to row
            row.append(text)
        #add row to table
        table.append(row)
        
    #returns a list of list of lists

In [15]:
browser = Browser('firefox')
browser.visit("https://www.azom.com/article.aspx?ArticleID=6769")
soup = BeautifulSoup(browser.html, 'html.parser')
#takes soup html and returns list of lists
# tables list of lists and returns a dictionary
# robots.txt indicates Crawl-delay: 120
time.sleep(0.15)

In [17]:
#  Parsing html tables - https://stackoverflow.com/questions/45843025/parsing-html-tables-with-beautifulsoup-in-python
# html_tables = soup2.find_all('tbody')
tables = [ # list of tables
    [ # list of rows in table
        # parse table data including headers
        [t.get_text(strip=True) for t in tr.find_all(['th', 'td'])]
        # for  each row of data
        for tr in table.find_all('tr')
    ] 
    for table in soup.find_all('table')
]
tables

[[['Element', 'Content (%)'],
  ['Chromium, Cr', '0.80 - 1.10'],
  ['Manganese, Mn', '0.75 - 1.0'],
  ['Carbon, C', '0.380 - 0.430'],
  ['Silicon, Si', '0.15 - 0.30'],
  ['Molybdenum, Mo', '0.15 - 0.25'],
  ['Sulfur, S', '0.040'],
  ['Phosphorous, P', '0.035'],
  ['Iron, Fe', 'Balance']],
 [['Properties', 'Metric', 'Imperial'],
  ['Density', '7.85 g/cm3', '0.284 lb/in³'],
  ['Melting point', '1416°C', '2580°F']],
 [['Properties', 'Metric', 'Imperial'],
  ['Tensile strength', '655 MPa', '95000 psi'],
  ['Yield strength', '415 MPa', '60200 psi'],
  ['Bulk modulus (typical for steel)', '140 GPa', '20300 ksi'],
  ['Shear modulus (typical for steel)', '80 GPa', '11600 ksi'],
  ['Elastic modulus', '190-210 GPa', '27557-30458 ksi'],
  ["Poisson's ratio", '0.27-0.30', '0.27-0.30'],
  ['Elongation at break (in 50 mm)', '25.70%', '25.70%'],
  ['Hardness, Brinell', '197', '197'],
  ['Hardness, Knoop (converted from Brinell hardness)', '219', '219'],
  ['Hardness, Rockwell B (converted from Brinel

In [18]:
from selenium.webdriver.common.keys import Keys

browser = Browser('firefox')
i = 0

for steel in df_data['steel_type']:
    browser.visit(f"{base_url}{search_path}{steel}")
    soup = BeautifulSoup(browser.html, 'html.parser')
    try:
        search_result = soup.find('div', class_='resultsContainer')
        first_item = search_result.find('a')
        # https://pytutorial.com/get-element-href-beautifulsoup/
        steel_link = first_item.get('href')
        time.sleep(0.15)
        browser.visit(f"{base_url}{steel_link}")
        soup2 = BeautifulSoup(browser.html, 'html.parser')
        time.sleep(0.15)
    except:
        i += 1
        print("Failed : ", i, " : ", steel)
        
    

    

In [19]:
# property - metric - imperial
table_elements = tables[0]
table_phys_props = tables[1]
table_mecha_props = tables[2]
table_therm_props = tables[3]
table_other_desigs = tables[4]

In [20]:
dict_props = {}
table_mecha_props = tables[2]
for row in table_mecha_props:
    try:
        #fail if empty list
        to_parse_metric = row[1]
        # parse numeric value as string
        value = re.search(r"[\d+\.\d+]+", to_parse_metric).group(0)
        # parse space seperated value units that may end in numbers example: '7.858 g/ cm3'
        unit = ''.join(to_parse_metric.split(' ')[1:])
        if unit == '':
            # parse unit if not space separated example: "40.00%"
            unit = re.search(r"[\D]+$", to_parse_metric).group(0)

        #add values if no errors in parsing
        # dict_props['metric'].append(value)
        # dict_props['unit'].append(unit)
        # dict_props["property"].append(row[0].lower())
        dict_props['steel_type'] = steel
        dict_props[row[0].lower()] = (value, unit)
    except:
        pass
    df = pl.DataFrame(dict_props)

In [21]:
#Test Case
list_to_parse  = ['7.858 g/ cm3', "40.00% ", "29%", "163"]
for item in list_to_parse:
    value = re.search(r"[\d+\.\d+]+", item).group(0)
    unit = ''.join(item.split(' ')[1:])
    if unit == '':
        unit = re.search(r"[\D]+$", item).group(0)
    print('value', value)
    print('unit ', unit)

value 7.858
unit  g/cm3
value 40.00
unit  % 
value 29
unit  %


AttributeError: 'NoneType' object has no attribute 'group'

In [15]:
dict_steel_tables.keys()

dict_keys(['AISI 52100', 'AISI 1095', 'AISI-SAE 4140', 'AISI-SAE 1038', 'AISI-SAE 1080', 'AISI-SAE 1040', 'AISI-SAE 4027', 'AISI-SAE 4340', 'AISI 1055', 'AISI 1074 Carbon Steel', 'AISI-SAE 1065', 'AISI-SAE 1045', 'AISI-SAE 5140', 'AISI 1090', 'AISI 4640', 'AISI-SAE 1335', 'AISI-SAE 4037', 'AISI-SAE 1042', 'AISI-SAE 1030', 'AISI-SAE 1035', 'AISI 1049', 'AISI-SAE 1050', 'AISI 4047', 'AISI-SAE 1026', 'AISI 6145', 'AISI 1030', 'AISI-SAE 6150', 'AISI-SAE 5160'])

In [18]:
pl.DataFrame(properties_dict)

property,metric,unit
str,str,str
"""density (chemical composition …","""7.845""","""g/cc"""
"""melting point""","""1521""","""°C"""
"""tensile strength""","""620""","""MPa"""
"""yield strength""","""415""","""MPa"""
"""bulk modulus (typical for stee…","""140""","""GPa"""
…,…,…
"""izod impact (as rolled)""","""49""","""J"""
"""izod impact (normalized at 900…","""65""","""J"""
"""thermal expansion co-efficient…","""11.3""","""µm/m°C"""
"""thermal conductivity (@ 100°c/…","""50.7""","""W/mK"""


In [21]:
properties_dict['property']

['density (chemical composition of 0.435% c, 0.69% mn, 0.20% si, annealed at 860°c (1580°f))',
 'melting point',
 'tensile strength',
 'yield strength',
 'bulk modulus (typical for steels)',
 'shear modulus (typical for steels)',
 'elastic modulus',
 'elongation at break (in 50 mm)',
 'reduction of area',
 'izod impact (annealed at 790°c (1450°f))',
 'izod impact (as rolled)',
 'izod impact (normalized at 900°c (1650°f)',
 'thermal expansion co-efficient (@ 20-100°c/68-212°f, composition of 0.40% c, 0.11% mn, 0.01% p, 0.03% s, 0.03% si, 0.03% cu)',
 'thermal conductivity (@ 100°c/212°f)',
 'thermal conductivity (@ 0°c)']

# Revisiting properties 


In [30]:
pl.Config.set_tbl_rows(30)

s_steel = pl.Series(name = 'steel', dtype= pl.String)
s_prop_s = pl.Series(name = 'prop_shape', dtype= pl.String)
s_mech_s = pl.Series(name = 'mech_shape', dtype= pl.String)

polars_check = True 
for steel, tables in dict_steel_tables_keep.items():
    all_polars = True
    count_polars = 0
    for table in tables:
        if isinstance(table, pl.DataFrame): 
            count_polars += 1 
    if count_polars != len(tables):
        all_polars = False
    if not (bool_check and all_polars):
        bool_check = False
    print("All polars?", all_polars, '|', len(tables), "Tables | Steel:", steel)

    phys_props_shape = str(tables[0].shape)
    mech_props_shape = str(tables[1].shape)
    s_steel.extend(pl.Series(name = 'steel', values = [steel]))
    s_prop_s.extend(pl.Series(name = 'prop_shape', values = [phys_props_shape]))
    s_mech_s.extend(pl.Series(name = 'nech_shape', values = [mech_props_shape]))

print("All Polars?", polars_check)
df_shapes = pl.DataFrame([s_steel, s_prop_s, s_mech_s])
df_shapes

All polars? True | 2 Tables | Steel: AISI 6145
All polars? True | 2 Tables | Steel: AISI-SAE 4027
All polars? True | 2 Tables | Steel: AISI 1090
All polars? True | 2 Tables | Steel: AISI 4640
All polars? True | 2 Tables | Steel: AISI-SAE 1080
All polars? True | 2 Tables | Steel: AISI 4047
All polars? True | 2 Tables | Steel: AISI-SAE 1030
All polars? True | 2 Tables | Steel: AISI-SAE 5140
All polars? True | 2 Tables | Steel: AISI 1095
All polars? True | 2 Tables | Steel: AISI-SAE 1050
All polars? True | 2 Tables | Steel: AISI-SAE 4140
All polars? True | 2 Tables | Steel: AISI-SAE 5160
All polars? True | 2 Tables | Steel: AISI-SAE 1035
All polars? True | 2 Tables | Steel: AISI-SAE 1026
All polars? True | 2 Tables | Steel: AISI 1049
All polars? True | 2 Tables | Steel: AISI-SAE 6150
All polars? True | 2 Tables | Steel: AISI 52100
All polars? True | 2 Tables | Steel: AISI 1074 Carbon Steel
All polars? True | 2 Tables | Steel: AISI-SAE 1065
All polars? True | 2 Tables | Steel: AISI-SAE 104

steel,prop_shape,mech_shape
str,str,str
"""AISI 6145""","""(1, 3)""","""(8, 3)"""
"""AISI-SAE 4027""","""(1, 3)""","""(13, 3)"""
"""AISI 1090""","""(1, 3)""","""(13, 3)"""
"""AISI 4640""","""(1, 3)""","""(8, 3)"""
"""AISI-SAE 1080""","""(1, 3)""","""(2, 3)"""
"""AISI 4047""","""(1, 3)""","""(10, 3)"""
"""AISI-SAE 1030""","""(2, 3)""","""(13, 3)"""
"""AISI-SAE 5140""","""(1, 3)""","""(12, 3)"""
"""AISI 1095""","""(2, 3)""","""(14, 3)"""
"""AISI-SAE 1050""","""(1, 3)""","""(14, 3)"""
