# Cleaning of BIODYN-100 BOM
Python 3.10.16

Associates components with Digikey links, uses the Digikey API to get additional data.


In [2]:
# Install libraries
%pip install python-dotenv git+https://github.com/hurricaneJoef/digikey-api.git

Collecting git+https://github.com/hurricaneJoef/digikey-api.git
  Cloning https://github.com/hurricaneJoef/digikey-api.git to /private/var/folders/0l/865rdbcs6jsb1367y_c84g0h0000gn/T/pip-req-build-ubecnhin
  Running command git clone --filter=blob:none --quiet https://github.com/hurricaneJoef/digikey-api.git /private/var/folders/0l/865rdbcs6jsb1367y_c84g0h0000gn/T/pip-req-build-ubecnhin
  Resolved https://github.com/hurricaneJoef/digikey-api.git to commit e4418f4aeb1c49c1e9b6c11f07e0b14567a43099
  Preparing metadata (setup.py) ... [?25ldone

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.10 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


## Read In BOM and Links

In [3]:
import csv

bom_file_name = "data/BIODYN-100 v58 BOM.csv"
links_file_name = "data/BIODYN-100 v58 Links.txt"

colnames = []
rows = []
links = []

with open(bom_file_name, 'r') as file:
	reader = csv.DictReader(file, delimiter=';')
	colnames = reader.fieldnames
	for row in reader:
		rows.append(row)

with open(links_file_name, 'r') as file:
	for line in file:
		links.append(line.strip())


## Filter Only for Columns we Want

In [4]:
print("Column Names:")

lencolsets = 4
colsets = [colnames[i:i + lencolsets] for i in range(0, len(colnames), lencolsets)]
for colset in colsets:
	print("\t", ", ".join(colset))

Column Names:
	 Part, Value, Device, Footprint Name
	 Detailed Description, AVAILABILITY, CATEGORY, COPYRIGHT
	 DATASHEET, DESCRIPTION, DIGI-KEY_PART_NUMBER_1, DIGI-KEY_PART_NUMBER_2
	 DIGI-KEY_PART_NUMBER_3, MANUFACTURER, MANUFACTURER_NAME, MANUFACTURER_PART_NUMBER
	 MAXIMUM_PACKAGE_HEIGHT, MF, MP, MPN
	 OPERATING_TEMPERATURE, PACKAGE, PACKAGE_SIZE, PACKAGE_TYPE
	 PARTREV, PART_STATUS, POPULARITY, PRICE
	 PROD_ID, PURCHASE-URL, REFDES, ROHS
	 SERIES, SNAPEDA_PACKAGE_ID, SPICEPREFIX, STANDARD
	 SUBCATEGORY, THERMALLOSS, TYPE, VALUE
	 


In [5]:
# Trim unused columns
keep_columns = ["Part", "Value", "Device", "Footprint Name", "Detailed Description"]
for row in rows:
	for col in colnames:
		if col not in keep_columns:
			row.pop(col, None)

## Match Parts to Digikey Links

In [6]:
# Map devices to links

# Items with different names in links 
altnames = {
	"TRIMPOTTC33X": "TC33X",
	"ERA6AEB134V": "ERA-6AEB134V",
	"ERA6AEB124V": "ERA-6AEB124V",
	"WE-PD_7332": "CL10B475KQ8NQNC",
}

# Find link from column key
def findlink(key: str):
	for row in rows:
		rd = row[key]
		if rd in altnames:
			rd = altnames[rd]
		for i in range(len(rd), 4, -1):
			for link in links:
				if rd[:i] in link:
					row['Link'] = link
					break
			else:
				continue
			break
	
# First try to find link with 'Device' column
findlink('Device')
# Check for ones without link
nolink = [r for r in rows if 'Link' not in r]
for r in nolink:
	print("No link found for device:", r['Device'], "in", r["Part"], "with value", r["Value"])



No link found for device: BATTERYLIPO-TABS in PWR1:BAT1:1S-1C-LIPO with value 


In [7]:
# Extract info from links

base_link_1 = "digikey.ca/en/products/detail/"
base_link_2 = "digikey.ca/product-detail/"
for row in rows:
	if 'Link' in row:
		link = row['Link']
		# Extract manufacturer part number from link
		if base_link_1 in link:
			dpkn = link.split(base_link_1)[1].split('/')[1]
		elif base_link_2 in link:
			dpkn = link.split(base_link_2)[1].split('/')[1]
		else:
			print("Unknown link format:", link)
			continue
		row['Manuf Part Number'] = dpkn

## Fetch Data from Digikey About Items

In [None]:
# Add Digikey Data from links to the dataset
# Ensure that .env contains DIGIKEY_CLIENT_ID, DIGIKEY_CLIENT_SECRET, and DIGIKEY_STORAGE_PATH

%load_ext dotenv
%dotenv

import digikey
from digikey.v4.productinformation import KeywordRequest
import os
import time

# Load secrets from .env
api_id = os.environ.get("DIGIKEY_CLIENT_ID")
api_secret = os.environ.get("DIGIKEY_CLIENT_SECRET")
storage_path = os.environ.get("DIGIKEY_STORAGE_PATH") # Make sure this isn't commited and pushed
os.environ['DIGIKEY_CLIENT_SANDBOX'] = 'False'

if not api_id or not api_secret or not storage_path:
	raise ValueError("DIGIKEY_CLIENT_ID, DIGIKEY_CLIENT_SECRET, and DIGIKEY_STORAGE_PATH must be set in environment variables")

CACHE_DIR = storage_path
if not os.path.exists(CACHE_DIR):
	os.mkdir(CACHE_DIR)

alternatives = {
	"INA106U-2K5": "INA106U/2K5"
}
memo = {}

cached = 0 
fetched = 0
failed = 0
for row in rows:
	if 'Manuf Part Number' in row:
		dpkn = row['Manuf Part Number']
		if dpkn in alternatives:
			dpkn = alternatives[dpkn]
		if dpkn in memo:
			cached += 1
			row['Raw Response'] = memo[dpkn]
			continue
		if 'Raw Response' in row:
			cached += 1
			memo[dpkn] = row['Raw Response']
			continue
		try:
			request = KeywordRequest(keywords=dpkn, limit=1, offset=0)
			response = digikey.keyword_search(body=request)
			if response.products:
				fetched += 1
				product = response.products[0]
				row['Raw Response'] = product
				memo[dpkn] = product
			else:
				failed += 1
				print("\nNo product found for", dpkn)
		except Exception as e:
			print("Error fetching data for", dpkn, ":", e)
		time.sleep(0.3)  # To avoid hitting rate limits

print(f"Cached: {cached}, Fetched: {fetched}, Failed: {failed}")


The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [None]:
# Extract useful fields from Digikey data to the dataset
for row in rows:
	if 'Raw Response' in row:
		product = row['Raw Response']
		row['Datasheet URL'] = product.datasheet_url
		if product.description.product_description:
			row['Description'] = product.description.product_description
		else:
			row['Description'] = product.description.detailed_description
		row['Manufacturer'] = product.manufacturer.name
		variant = next((x for x in product.product_variations if "Cut Tape" in x.package_type.name), None)
		if not variant:
			variant = product.product_variations[0]
		row['DigiKey Part Number'] = variant.digi_key_product_number
		row['Pricing'] = variant.standard_pricing
		row['Stock'] = variant.quantity_availablefor_package_type
	else:
		print("No DigiKey data for", row['Device'])


## Save Cleaned BOM to File

In [None]:
# Save to file
output_file_name = bom_file_name.replace(".csv", " Tidy.csv")

with open(output_file_name, 'w', newline='') as file:
	fieldnames = [x for x in rows[0].keys() if x != "Raw Response"]
	writer = csv.DictWriter(file, fieldnames=fieldnames, delimiter=';')
	writer.writeheader()
	for row in rows:
		rowcopy = row.copy()
		rowcopy.pop('Raw Response', None)  # Remove Raw Response to avoid clutter
		writer.writerow(rowcopy)

Copyright © 2025 Callum Mackenzie 