## Create Poetry TOML

In [2]:
import re

def parse_package_list(package_list_text):
    """Parse a package list text into a list of (package_name, version) tuples."""
    packages = []
    lines = package_list_text.strip().split('\n')
    
    # Skip header lines
    for line in lines[2:]:  # Skip the first two lines (header and separator)
        # Use regex to extract package name and version
        match = re.match(r'(\S+)\s+(.+)', line.strip())
        if match:
            package_name, version = match.groups()
            packages.append((package_name.strip(), version.strip()))
    
    return packages

def generate_pyproject_toml(package_tuples, python_version="3.9"):
    """Generate a pyproject.toml file from a list of (package_name, version) tuples."""
    toml = f"""[tool.poetry]
    name = "encoding_music_libraries"
    version = "0.1.0"
    description = "Your project description"
    authors = ["Richard Freedman <rfreedma@haverford.edu>"]
    
    [tool.poetry.dependencies]
    python = "^{python_version}"
    """
    
    # Add packages to dependencies section
    for package, version in package_tuples:
        # Skip Python itself and some system packages
        if package.lower() in ['python', 'pip', 'setuptools', 'wheel']:
            continue
        
        # Clean version string and add to TOML
        if version:
            toml += f'{package} = "^{version}"\n'
        else:
            toml += f'{package} = "*"\n'
    
    # Add build system section
    toml += """
[tool.poetry.dev-dependencies]
pytest = "*"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
"""
    return toml

# Example usage with your package list
package_list_text = """Package                   Version
------------------------- --------------
alembic                   1.13.1
altair                    5.2.0
annotated-types           0.7.0
anyio                     4.8.0
argon2-cffi               23.1.0
argon2-cffi-bindings      21.2.0
arrow                     1.3.0
asttokens                 2.4.1
async-generator           1.10
async-lru                 2.0.4
async-timeout             4.0.3
attrs                     25.1.0
Babel                     2.14.0
backcall                  0.2.0
beautifulsoup4            4.12.3
bleach                    6.1.0
blis                      1.3.0
brotlipy                  0.7.0
catalogue                 2.0.10
certifi                   2023.11.17
certipy                   0.1.3
cffi                      1.15.1
chardet                   4.0.0
charset-normalizer        3.4.1
click                     8.1.8
cloudpathlib              0.21.1
colorama                  0.4.6
comm                      0.2.2
conda                     23.1.0
conda-package-handling    2.0.2
conda_package_streaming   0.7.0
confection                0.1.5
contourpy                 1.3.1
crim_intervals            2.0.47
cryptography              40.0.1
cycler                    0.12.1
cymem                     2.0.11
debugpy                   1.8.1
decorator                 5.1.1
defusedxml                0.7.1
docx                      0.2.4
exceptiongroup            1.2.2
executing                 2.0.1
fastjsonschema            2.19.1
fonttools                 4.56.0
fqdn                      1.5.1
greenlet                  3.0.3
h11                       0.14.0
httpcore                  1.0.2
httpx                     0.26.0
idna                      3.1
intervals_mei_tools       1.0.0
ipykernel                 6.29.4
ipython                   7.34.0
ipywidgets                8.1.2
isoduration               20.11.0
jedi                      0.19.2
Jinja2                    3.1.5
joblib                    1.0.1
json5                     0.9.25
jsonpickle                4.0.1
jsonpointer               2.4
jsonschema                4.23.0
jsonschema-specifications 2024.10.1
jupyter_client            8.6.1
jupyter_core              5.7.2
jupyter-events            0.10.0
jupyter-lsp               2.2.5
jupyter-resource-usage    1.0.2
jupyter_server            2.14.0
jupyter_server_terminals  0.5.3
jupyter-telemetry         0.1.0
jupyterhub                4.1.5
jupyterlab                4.1.8
jupyterlab_pygments       0.3.0
jupyterlab-quarto         0.3.3
jupyterlab_server         2.27.1
jupyterlab_widgets        3.0.10
kiwisolver                1.4.8
langcodes                 3.5.0
language_data             1.3.0
libmambapy                1.4.1
lxml                      5.1.0
Mako                      1.3.3
mamba                     1.4.1
marisa-trie               1.2.1
markdown-it-py            3.0.0
MarkupSafe                3.0.2
matplotlib                3.10.0
matplotlib-inline         0.1.7
mdurl                     0.1.2
mei_import_tools          1.0.1
mei_tools                 1.0.0
mistune                   3.0.2
more-itertools            8.7.0
murmurhash                1.0.12
music21                   8.3.0
nbclient                  0.10.0
nbconvert                 7.16.4
nbformat                  5.10.4
nbgitpuller               1.2.1
nest-asyncio              1.6.0
networkx                  3.4.2
notebook                  7.1.3
notebook_shim             0.2.4
numpy                     2.2.5
oauthlib                  3.2.2
overrides                 7.7.0
packaging                 24.2
pamela                    1.1.0
pandas                    2.2.3
pandocfilters             1.5.1
parso                     0.8.4
patsy                     1.0.1
pexpect                   4.9.0
pickleshare               0.7.5
pillow                    11.1.0
pip                       24.0
platformdirs              4.2.1
plotly                    5.19.0
plotly-express            0.4.1
pluggy                    1.0.0
preshed                   3.0.9
prometheus_client         0.20.0
prompt_toolkit            3.0.50
psutil                    5.9.8
ptyprocess                0.7.0
pure-eval                 0.2.2
pyarrow                   19.0.0
pycosat                   0.6.4
pycparser                 2.21
pydantic                  2.11.4
pydantic_core             2.33.2
Pygments                  2.19.1
pyOpenSSL                 23.1.1
pyparsing                 3.2.1
PyPDF2                    3.0.1
PySocks                   1.7.1
python-dateutil           2.8.2
python-docx               1.1.2
python-json-logger        2.0.7
python-louvain            0.16
pytz                      2021.1
pyvis                     0.1.9
PyYAML                    6.0.1
pyzmq                     26.0.2
redis                     5.1.1
referencing               0.36.2
requests                  2.31.0
rfc3339-validator         0.1.4
rfc3986                   1.4.0
rfc3986-validator         0.1.1
rich                      14.0.0
rpds-py                   0.22.3
ruamel.yaml               0.17.21
ruamel.yaml.clib          0.2.7
scipy                     1.15.1
seaborn                   0.13.0
Send2Trash                1.8.3
setuptools                75.8.0"""

# Parse the package list into tuples
package_tuples = parse_package_list(package_list_text)

# Print the first 10 tuples to verify
print("First 10 package tuples:")
for i, (package, version) in enumerate(package_tuples[:10]):
    print(f"{i+1}. ({package}, {version})")

# Generate the pyproject.toml file
pyproject_toml = generate_pyproject_toml(package_tuples)

# Write to file
with open("pyproject.toml", "w") as f:
    f.write(pyproject_toml)

print("\nGenerated pyproject.toml file successfully!")

# # Optionally, print a snippet of the generated TOML
# print("\nSnippet of generated pyproject.toml:")
# print("\n".join(pyproject_toml.split("\n")[:20]))

First 10 package tuples:
1. (alembic, 1.13.1)
2. (altair, 5.2.0)
3. (annotated-types, 0.7.0)
4. (anyio, 4.8.0)
5. (argon2-cffi, 23.1.0)
6. (argon2-cffi-bindings, 21.2.0)
7. (arrow, 1.3.0)
8. (asttokens, 2.4.1)
9. (async-generator, 1.10)
10. (async-lru, 2.0.4)

Generated pyproject.toml file successfully!


In [6]:
import re
import subprocess
import os

def parse_package_list(package_list_text):
    """Parse a package list text into a list of (package_name, version) tuples."""
    packages = []
    lines = package_list_text.strip().split('\n')
    
    # Skip header lines
    for line in lines[2:]:  # Skip the first two lines (header and separator)
        # Use regex to extract package name and version
        match = re.match(r'(\S+)\s+(.+)', line.strip())
        if match:
            package_name, version = match.groups()
            packages.append((package_name.strip(), version.strip()))
    
    return packages

def generate_pyproject_toml(package_tuples, python_version="3.9"):
    """Generate a pyproject.toml file from a list of (package_name, version) tuples."""
    toml = f"""[tool.poetry]
name = "encoding_music_libraries"
version = "0.1.0"
description = "Your project description"
authors = ["Richard Freedman <rfreedma@haverford.edu>"]

[tool.poetry.dependencies]
python = "^{python_version}"
"""
    
    # Add packages to dependencies section
    excluded_packages = ['python', 'pip', 'setuptools', 'wheel', 'conda', 'conda-package-handling', 
                         'conda_package_streaming', 'libmambapy', 'mamba', 'poetry-core']
    
    # Use a dictionary to track packages and prevent duplicates
    package_dict = {}
    
    for package, version in package_tuples:
        # Convert package name to lowercase for case-insensitive comparison
        package_lower = package.lower()
        
        # Skip excluded packages
        if package_lower in excluded_packages:
            continue
        
        # Skip if we've already added this package (prevents duplicates)
        if package_lower in package_dict:
            continue
            
        # Add to our tracking dictionary
        package_dict[package_lower] = version
        
        # Clean version string and add to TOML
        if version:
            # Convert version to Poetry's preferred format
            toml += f'{package} = "^{version}"\n'
        else:
            toml += f'{package} = "*"\n'
    
    # Add build system section
    toml += """
[tool.poetry.dev-dependencies]
pytest = "*"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
"""
    return toml

def validate_toml(toml_content):
    """Basic validation to check for obvious TOML errors."""
    try:
        import tomli
        tomli.loads(toml_content)
        return True, None
    except Exception as e:
        return False, str(e)

# Example usage with your package list
package_list_text = """Package                   Version
------------------------- --------------
alembic                   1.13.1
altair                    5.2.0
annotated-types           0.7.0
anyio                     4.8.0
argon2-cffi               23.1.0
argon2-cffi-bindings      21.2.0
arrow                     1.3.0
asttokens                 2.4.1
async-generator           1.10
async-lru                 2.0.4
async-timeout             4.0.3
attrs                     25.1.0
Babel                     2.14.0
backcall                  0.2.0
beautifulsoup4            4.12.3
bleach                    6.1.0
blis                      1.3.0
brotlipy                  0.7.0
catalogue                 2.0.10
certifi                   2023.11.17
certipy                   0.1.3
cffi                      1.15.1
chardet                   4.0.0
charset-normalizer        3.4.1
click                     8.1.8
cloudpathlib              0.21.1
colorama                  0.4.6
comm                      0.2.2
conda                     23.1.0
conda-package-handling    2.0.2
conda_package_streaming   0.7.0
confection                0.1.5
contourpy                 1.3.1
crim_intervals            2.0.47
cryptography              40.0.1
cycler                    0.12.1
cymem                     2.0.11
debugpy                   1.8.1
decorator                 5.1.1
defusedxml                0.7.1
docx                      0.2.4
exceptiongroup            1.2.2
executing                 2.0.1
fastjsonschema            2.19.1
fonttools                 4.56.0
fqdn                      1.5.1
greenlet                  3.0.3
h11                       0.14.0
httpcore                  1.0.2
httpx                     0.26.0
idna                      3.1
intervals_mei_tools       1.0.0
ipykernel                 6.29.4
ipython                   7.34.0
ipywidgets                8.1.2
isoduration               20.11.0
jedi                      0.19.2
Jinja2                    3.1.5
joblib                    1.0.1
json5                     0.9.25
jsonpickle                4.0.1
jsonpointer               2.4
jsonschema                4.23.0
jsonschema-specifications 2024.10.1
jupyter_client            8.6.1
jupyter_core              5.7.2
jupyter-events            0.10.0
jupyter-lsp               2.2.5
jupyter-resource-usage    1.0.2
jupyter_server            2.14.0
jupyter_server_terminals  0.5.3
jupyter-telemetry         0.1.0
jupyterhub                4.1.5
jupyterlab                4.1.8
jupyterlab_pygments       0.3.0
jupyterlab-quarto         0.3.3
jupyterlab_server         2.27.1
jupyterlab_widgets        3.0.10
kiwisolver                1.4.8
langcodes                 3.5.0
language_data             1.3.0
libmambapy                1.4.1
lxml                      5.1.0
Mako                      1.3.3
mamba                     1.4.1
marisa-trie               1.2.1
markdown-it-py            3.0.0
MarkupSafe                3.0.2
matplotlib                3.10.0
matplotlib-inline         0.1.7
mdurl                     0.1.2
mei_import_tools          1.0.1
mei_tools                 1.0.0
mistune                   3.0.2
more-itertools            8.7.0
murmurhash                1.0.12
music21                   8.3.0
nbclient                  0.10.0
nbconvert                 7.16.4
nbformat                  5.10.4
nbgitpuller               1.2.1
nest-asyncio              1.6.0
networkx                  3.4.2
notebook                  7.1.3
notebook_shim             0.2.4
numpy                     2.2.5
oauthlib                  3.2.2
overrides                 7.7.0
packaging                 24.2
pamela                    1.1.0
pandas                    2.2.3
pandocfilters             1.5.1
parso                     0.8.4
patsy                     1.0.1
pexpect                   4.9.0
pickleshare               0.7.5
pillow                    11.1.0
pip                       24.0
platformdirs              4.2.1
plotly                    5.19.0
plotly-express            0.4.1
pluggy                    1.0.0
preshed                   3.0.9
prometheus_client         0.20.0
prompt_toolkit            3.0.50
psutil                    5.9.8
ptyprocess                0.7.0
pure-eval                 0.2.2
pyarrow                   19.0.0
pycosat                   0.6.4
pycparser                 2.21
pydantic                  2.11.4
pydantic_core             2.33.2
Pygments                  2.19.1
pyOpenSSL                 23.1.1
pyparsing                 3.2.1
PyPDF2                    3.0.1
PySocks                   1.7.1
python-dateutil           2.8.2
python-docx               1.1.2
python-json-logger        2.0.7
python-louvain            0.16
pytz                      2021.1
pyvis                     0.1.9
PyYAML                    6.0.1
pyzmq                     26.0.2
redis                     5.1.1
referencing               0.36.2
requests                  2.31.0
rfc3339-validator         0.1.4
rfc3986                   1.4.0
rfc3986-validator         0.1.1
rich                      14.0.0
rpds-py                   0.22.3
ruamel.yaml               0.17.21
ruamel.yaml.clib          0.2.7
scipy                     1.15.1
seaborn                   0.13.0
Send2Trash                1.8.3
setuptools                75.8.0"""

# Parse the package list into tuples
package_tuples = parse_package_list(package_list_text)

# Generate the pyproject.toml file
pyproject_toml = generate_pyproject_toml(package_tuples)

# Validate the TOML content before writing
is_valid, error_message = validate_toml(pyproject_toml)
if not is_valid:
    print(f"Generated TOML is invalid: {error_message}")
    # Try to find the line with the error
    lines = pyproject_toml.split('\n')
    if "line" in error_message:
        try:
            line_num = int(re.search(r'line (\d+)', error_message).group(1))
            if 0 <= line_num < len(lines):
                print(f"Problematic line ({line_num}): {lines[line_num-1]}")
        except (AttributeError, ValueError):
            pass
else:
    # Write to file
    with open("pyproject.toml", "w") as f:
        f.write(pyproject_toml)
    
    print("\nGenerated pyproject.toml file successfully!")
    
    # Try to generate the lock file
    try:
        print("Generating poetry.lock file...")
        result = subprocess.run(["poetry", "lock"], check=True, capture_output=True, text=True)
        print("Generated poetry.lock file successfully!")
    except subprocess.CalledProcessError as e:
        print(f"Error generating poetry.lock file: {e}")
        print(f"Poetry output: {e.stdout}\n{e.stderr}")
        
        # If there's a TOML error, try to provide more context
        if "TOMLDecodeError" in e.stderr:
            error_line_match = re.search(r'at line (\d+), column (\d+)', e.stderr)
            if error_line_match:
                line_num = int(error_line_match.group(1))
                col_num = int(error_line_match.group(2))
                
                # Print the problematic line and surrounding lines
                lines = pyproject_toml.split('\n')
                start = max(0, line_num - 3)
                end = min(len(lines), line_num + 2)
                
                print("\nProblematic section in pyproject.toml:")
                for i in range(start, end):
                    prefix = ">>> " if i+1 == line_num else "    "
                    print(f"{prefix}Line {i+1}: {lines[i]}")
                
                # Suggest a fix
                print("\nPossible fix: Check for duplicate package entries or invalid characters in the TOML file.")

Generated TOML is invalid: Cannot overwrite a value (at line 158, column 28)
Problematic line (158): ruamel.yaml.clib = "^0.2.7"
