# Project Workflow: Automated Conda Environment Builder

In [None]:

## Define Detailed Steps:

### Search for Pre-existing Packages:
Leverage existing packages to avoid reinventing the wheel. Look for packages that meet the project requirements.

In [None]:
# Workflow Diagram For CondaQuickBuilder:

In [None]:
from IPython.display import display, HTML

mermaid_code = """
<div class="mermaid">
graph TB
    A[Start] --> B{Unzip or Clone the starter project repository}
    B --> C{Determine whether there is a environments.yml file or a requirements.txt file or a Poetry spec}
    C -->|environments.yml| D[Use environments.yml to create the environment]
    C -->|requirements.txt| E[Use requirements.txt to create the environment]
    C -->|Poetry spec| F[Ask the user what to do next, or use the Poetry spec to create the environment]
    D --> G[Parse the requirements file]
    E --> G
    F --> G
    G --> H{Plan to use latest version of Python BY DEFAULT, UNLESS the new project package requires an older version}
    H --> I[Alert the user that latest version of Python will be used. Ask them if they want to use a different version, if it is not specified in the requirements file]
    I --> J[Get the list of channels used in the conda configuration (if available)]
    J --> K[Get the list of environments available locally]
    K --> L[Create a new environment]
    L --> M[Install the packages in the environment]
    M --> N[Save the versions of the packages to a file]
    N --> O[End]
</div>
"""

mermaid_js = """
<script src="https://cdn.jsdelivr.net/npm/mermaid/dist/mermaid.min.js"></script>
<script>mermaid.initialize({startOnLoad:true});</script>
"""

display(HTML(mermaid_js + mermaid_code))

: 

In [None]:
graph TB
    A[Start] --> B{Unzip or Clone the starter project repository}
    B --> C{Determine whether there is a environments.yml file or a requirements.txt file or a Poetry spec}
    C -->|environments.yml| D[Use environments.yml to create the environment]
    C -->|requirements.txt| E[Use requirements.txt to create the environment]
    C -->|Poetry spec| F[Ask the user what to do next, or use the Poetry spec to create the environment]
    D --> G[Parse the requirements file]
    E --> G
    F --> G
    G --> H{Plan to use latest version of Python BY DEFAULT, UNLESS the new project package requires an older version}
    H --> I[Alert the user that latest version of Python will be used. Ask them if they want to use a different version, if it is not specified in the requirements file]
    I --> J[Get the list of channels used in the conda configuration (if available)]
    J --> K[Get the list of environments available locally]
    K --> L[Create a new environment]
    L --> M[Install the packages in the environment]
    M --> N[Save the versions of the packages to a file]
    N --> O[End]
    

In [4]:
# Code to search for pre-existing packages in a conda environment and install them if they are not present
import subprocess
import json

def check_package_installed(env_name, package):
    result = subprocess.run(["conda", "list", "--name", env_name, package, "--json"], stdout=subprocess.PIPE, check=True)
    packages = json.loads(result.stdout)
    return any(pkg['name'] == package for pkg in packages)

def create_conda_env(env_name, packages):
    # Create a new conda environment and install packages
    commands = [
        "conda create --name {} --yes".format(env_name),
    ] + [
        "conda install --name {} --yes {}".format(env_name, package) if not check_package_installed(env_name, package) else ""
        for package in packages
    ]

    script = " && ".join(cmd for cmd in commands if cmd)

    subprocess.run(script, shell=True, check=True)
    


In [None]:
### Select the Best-Fitting Package:
Choose an existing package that aligns with the initial requirements. Consider factors like popularity, community support, and reliability.

In [None]:
# Code to select the best-fitting package
def create_conda_env(env_name, packages):
    # Create a new conda environment
    subprocess.run(["conda", "create", "--name", env_name, "--yes"], check=True)

    # Install packages
    for package in packages:
        name, version = package.split('==') if '==' in package else (package, "")
        if not check_package_installed(env_name, name):
            subprocess.run(["conda", "install", "--name", env_name, "--yes", package], check=True)

    # Activate the environment
    subprocess.run(["conda", "activate", env_name], check=True)

# Usage
create_conda_env("my_env", ["numpy==1.18.5", "pandas", "scikit-learn"])

In [None]:
### Download the Best-Fitting Package:
Retrieve the selected package. Ensure it is readily available for use.

In [None]:
#Code to download the best-fitting package
import re

def get_latest_package_version(package):
    result = subprocess.run(["conda", "search", "--json", package], stdout=subprocess.PIPE, check=True)
    packages = json.loads(result.stdout)
    versions = [pkg['version'] for pkg in packages[package]]
    versions.sort(key=lambda s: list(map(int, re.findall(r'\d+', s))), reverse=True)
    return versions[0] if versions else None

def create_conda_env(env_name, packages):
    # Create a new conda environment
    subprocess.run(["conda", "create", "--name", env_name, "--yes"], check=True)

    # Install packages
    for package in packages:
        name, version = package.split('==') if '==' in package else (package, "")
        if not version:
            version = get_latest_package_version(name)
        if not check_package_installed(env_name, name):
            subprocess.run(["conda", "install", "--name", env_name, "--yes", f"{name}=={version}"], check=True)

    # Activate the environment
    subprocess.run(["conda", "activate", env_name], check=True)

# Usage
create_conda_env("my_env", ["numpy", "pandas", "scikit-learn"])

In [None]:
### User-Specify Project Folder Name:
Create a function (get_new_project_name()) to prompt the user for a project folder name. Use this name for the new project directory.

In [None]:
def get_new_project_name():
    # Prompt the user for a project folder name
import os

def get_new_project_name():
    while True:
        project_name = input("Enter a name for the new project directory: ")
        if not os.path.exists(project_name):
            os.makedirs(project_name)
            print(f"Created new project directory: {project_name}")
            break
        else:
            print("A directory with this name already exists. Please enter a different name.")

# Usage
get_new_project_name()   pass

In [None]:
### Unzip the Best-Fitting Package into New Project Folder:
Implement a function (unzip_starter_project_repo()) to extract the package contents. Place them in the user-specified project folder.

In [None]:
# Code to extract the package contents into the user-specified project folder

def unzip_starter_project_repo():
import zipfile

def unzip_starter_project_repo(project_name):
    with zipfile.ZipFile('best_fitting_package.zip', 'r') as zip_ref:
        zip_ref.extractall(project_name)

# Usage
# Assume that the project_name is obtained from the get_new_project_name() function
project_name = get_new_project_name()
unzip_starter_project_repo(project_name)    
    pass

In [None]:
### Scan Project Folder for Required Python Package Dependencies:
Develop a function (scan_project_modules()) to identify the unique set of required Python packages. Store these dependencies in a set (py_dependencies_set).

In [None]:
# Scan Project Folder for Required Python Package Dependencies:
import os
import re

def scan_project_modules(project_name):
    py_dependencies_set = set()

    for root, dirs, files in os.walk(project_name):
        for file in files:
            if file.endswith('.py'):
                with open(os.path.join(root, file), 'r') as f:
                    content = f.read()
                    imports = re.findall(r'^import\s+(\w+)', content, re.MULTILINE)
                    from_imports = re.findall(r'^from\s+(\w+)', content, re.MULTILINE)
                    py_dependencies_set.update(imports, from_imports)

    return py_dependencies_set

# Usage
# Assume that the project_name is obtained from the get_new_project_name() function
project_name = get_new_project_name()
py_dependencies_set = scan_project_modules(project_name)
print(py_dependencies_set)

In [None]:
### Scan Local Pre-existing Virtual Environments:
Create a function (scan_existing_env()) to analyze our existing virtual environments. Determine which environment best fits the dependency tree (current_env_dependencies_set).

In [None]:
# Scan Local Pre-existing Virtual Environments:
def scan_existing_env(current_env_dependencies_set):
    result = subprocess.run(["conda", "env", "list"], stdout=subprocess.PIPE, check=True)
    envs = result.stdout.decode().splitlines()[3:]  # Skip the header lines

    best_env = None
    best_match_count = 0

    for env in envs:
        env_name = env.split()[0]
        result = subprocess.run(["conda", "list", "--name", env_name, "--json"], stdout=subprocess.PIPE, check=True)
        packages = json.loads(result.stdout)
        package_names = {pkg['name'] for pkg in packages}

        match_count = len(current_env_dependencies_set & package_names)
        if match_count > best_match_count:
            best_env = env_name
            best_match_count = match_count

    return best_env

# Usage
# Assume that current_env_dependencies_set is obtained from the scan_project_modules() function
current_env_dependencies_set = scan_project_modules(project_name)
best_env = scan_existing_env(current_env_dependencies_set)
print(best_env)

In [None]:
### Decide on Environment Strategy:
Evaluate whether to use or clone an existing virtual environment or create a new one. Consider whether existing environments have the required up-to-date packages.

In [None]:
# Code to decide on environment strategy
def decide_environment_strategy(project_name):
    # Get the set of dependencies of the project
    project_dependencies = scan_project_modules(project_name)

    # Get the best fitting existing environment
    best_env = scan_existing_env(project_dependencies)

    if best_env is not None:
        # Get the set of packages in the best fitting environment
        result = subprocess.run(["conda", "list", "--name", best_env, "--json"], stdout=subprocess.PIPE, check=True)
        packages = json.loads(result.stdout)
        best_env_packages = {pkg['name'] for pkg in packages}

        # Check if the best fitting environment contains all the dependencies of the project
        if project_dependencies.issubset(best_env_packages):
            print(f"The existing environment '{best_env}' can be used directly.")
        else:
            missing_packages = project_dependencies - best_env_packages
            print(f"The existing environment '{best_env}' can be cloned and the following packages can be installed: {missing_packages}")
    else:
        print("A new environment can be created.")

# Usage
# Assume that the project_name is obtained from the get_new_project_name() function
project_name = get_new_project_name()
decide_environment_strategy(project_name)

In [None]:
### Decide on Environment Strategy:
Evaluate whether to use or clone an existing virtual environment or create a new one. Consider whether existing environments have the required up-to-date packages.

In [None]:
# Decide on Environment Strategy:

import subprocess
import json

def decide_environment_strategy(project_dependencies):
    # Get the best fitting existing environment
    best_env = scan_existing_env(project_dependencies)

    if best_env is not None:
        # Get the set of packages in the best fitting environment
        result = subprocess.run(["conda", "list", "--name", best_env, "--json"], stdout=subprocess.PIPE, check=True)
        packages = json.loads(result.stdout)
        best_env_packages = {pkg['name'] for pkg in packages}

        # Check if the best fitting environment contains all the dependencies of the project
        if project_dependencies.issubset(best_env_packages):
            print(f"The existing environment '{best_env}' can be used directly.")
            return best_env, 'use'
        else:
            missing_packages = project_dependencies - best_env_packages
            print(f"The existing environment '{best_env}' can be cloned and the following packages can be installed: {missing_packages}")
            return best_env, 'clone'
    else:
        print("A new environment can be created.")
        return None, 'create'

# Usage
# Assume that project_dependencies is obtained from the scan_project_modules() function
project_dependencies = scan_project_modules(project_name)
best_env, strategy = decide_environment_strategy(project_dependencies)

In [None]:
### Iterate Until All Packages and Versions Are Found:
Continuously search and verify package availability. Build lists of repositories (conda-forge, anaconda, CONDA-OTHER, or pip) containing required versions. Ensure all dependencies are covered.

In [None]:
# Code to iterate until all packages and versions are found

import subprocess
import json

def search_packages(package_versions):
    repositories = ['conda-forge', 'anaconda']
    available_packages = {repo: [] for repo in repositories}

    for package, version in package_versions.items():
        for repo in repositories:
            command = ["conda", "search", "--channel", repo, f"{package}={version}", "--json"]
            result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

            if result.returncode == 0:
                data = json.loads(result.stdout)
                if data:
                    available_packages[repo].append(package)

    return available_packages

# Usage
# Assume that package_versions is a dictionary where the keys are package names and the values are the required versions
package_versions = {"numpy": "1.21.0", "pandas": "1.3.0"}
available_packages = search_packages(package_versions)
print(available_packages)

In [None]:
### Create Python Functions for Environment Execution:
Develop functions or separate script commands needed to set up and activate the environment. Handle installation of packages from the identified repositories.

In [None]:
# Code to create Python functions for environment execution
import subprocess

def create_and_setup_environment(env_name, packages, repo='conda-forge'):
    # Create a new environment
    result = subprocess.run(["conda", "create", "--name", env_name, "--yes"], check=True)
    if result.returncode != 0:
        print(f"Failed to create environment '{env_name}'.")
        return

    # Activate the environment
    # Note: This won't affect the parent shell environment, so it won't actually switch the active Conda environment.
    # In a real use case, you would need to instruct the user to activate the environment manually.
    result = subprocess.run(["conda", "activate", env_name], check=True)
    if result.returncode != 0:
        print(f"Failed to activate environment '{env_name}'.")
        return

    # Install packages
    for package in packages:
        result = subprocess.run(["conda", "install", "--channel", repo, package, "--yes"], check=True)
        if result.returncode != 0:
            print(f"Failed to install package '{package}'.")

# Usage
# Assume that env_name is the name of the new environment, and packages is a list of package names
env_name = "new_env"
packages = ["numpy", "pandas"]
create_and_setup_environment(env_name, packages)


In [None]:
### Create Python Functions (or Separate Script Commands) for Environment Build Steps:
Implement the following steps as functions or separate script commands:
- a) Create “Core” Environment: Use as many conda-forge packages as possible. Example: conda create -n new_env -c conda-forge python conda ...
- b) Activate the New Environment: Use conda activate new_env.
- c) Install Packages from Conda Channels: Install all other required packages from conda-forge, anaconda, or other specified channels. Example: conda install -c conda-forge package1 package2 ...
- d) Install Remaining Packages via Pip: Install any remaining packages that are not available in conda format. Example: pip install package3 package4 ...

In [None]:
# Create Python Functions (or Separate Script Commands) for Environment Build Steps:

import subprocess

def create_core_environment(env_name):
    # Create a new environment with python and conda from conda-forge
    result = subprocess.run(["conda", "create", "--name", env_name, "-c", "conda-forge", "python", "conda", "--yes"], check=True)
    if result.returncode != 0:
        print(f"Failed to create environment '{env_name}'.")
        return False
    return True

def activate_environment(env_name):
    # Note: This won't affect the parent shell environment, so it won't actually switch the active Conda environment.
    # In a real use case, you would need to instruct the user to activate the environment manually.
    result = subprocess.run(["conda", "activate", env_name], check=True)
    if result.returncode != 0:
        print(f"Failed to activate environment '{env_name}'.")
        return False
    return True

def install_packages_from_conda(env_name, packages, channel="conda-forge"):
    for package in packages:
        result = subprocess.run(["conda", "install", "--name", env_name, "-c", channel, package, "--yes"], check=True)
        if result.returncode != 0:
            print(f"Failed to install package '{package}' from channel '{channel}'.")
            return False
    return True

def install_remaining_packages_via_pip(env_name, packages):
    for package in packages:
        result = subprocess.run(["conda", "run", "-n", env_name, "pip", "install", package], check=True)
        if result.returncode != 0:
            print(f"Failed to install package '{package}' via pip.")
            return False
    return True

# Usage
env_name = "new_env"
conda_packages = ["package1", "package2"]
pip_packages = ["package3", "package4"]

if create_core_environment(env_name):
    if activate_environment(env_name):
        if install_packages_from_conda(env_name, conda_packages):
            install_remaining_packages_via_pip(env_name, pip_packages)

In [None]:
### Build the Final Script to Execute the 12 Steps:
Combine all the steps into a single script or workflow. Ensure proper error handling and logging.



















































```python
# Placeholder for code to build the final script to execute the