## Basic settings

In [146]:
# 15 = VS 2017, 16 = VS 2019, 17 = VS 2022, etc
vs_version = 17
# release = current channel, pre = preview channel
vs_channel = 'release'
# Build tools only, no guarantee it will work for other products!
vs_product_id = 'Microsoft.VisualStudio.Product.BuildTools'
# If neutral language is not found, it will try to find the preferred language(s)
# Priority is based on the index and only one language will be chosen at the end
# If all of them are not found but the "language" key exists, the package may be skipped
vs_lang = ['en-US']
# Prefer specific chip/arch for packages (works the same way as "vs_lang")
# Chip value is either X86 or X64, but host arch can be X86, X64, or arm64
# The first one should be your system spec, it will be used to check dependency requirement
# The second one and the rest are for fallback if the required package is not found
vs_chip = ['X64', 'X86']
vs_host = ['X64', 'X86']
# Target arch should be the same as host to avoid possible file conflict
# Currently, there is no reliable (json) key that can be used to check target arch
# I don't think the VS installer provides a way to choose target arch either
# Because of that, the host and target arch will always be the same
# vs_target = ['X64', 'X86']

# Automatically accept VS license
vs_license_accept = True
# Compare the local manifest hash with online hash to avoid redownloading
# Sadly, it's unreliable because the online hash is not always be up-to-date
# Setting to false will assume the local manifest is always valid and up-to-date
vs_verify_manifest = False
# Automatically download and/or verify packages
# Packages will not be redownloaded if already up-to-date
vs_package_download = True
# Delete packages that are not listed in payloads list (variable)
# That means switching workloads may also delete some packages
vs_package_del_outdated = True
# Extract downloaded package files for supported package type
# Currently, the only supported package type is vsix
# If set to false, output may still appear but no files will be extracted
vs_package_extract = True
# Temporarily install Windows SDKs if exist in packages
# Files will be copied and Windows SDKs will be uninstalled again after that
# Make sure you don't have any conflicting SDKs installed to prevent accidental uninstall
vs_package_win_sdk = True

# For storing json data files (channel info, manifest, etc)
data_folder = 'data'
# For storing downloaded packages
# You can use relative path, but absolute path is recommended
# Backslash in path should be escaped and trailing slash must be removed
cache_path = 'packages'
# Root directory for file extraction
# Some files may need to be extracted/installed manually
extract_path = 'C:\\BuildTools'

In [119]:
import os

# Convert relative path to absolute path
# Relative path is based on current working dir
data_folder = os.path.abspath(data_folder)
cache_path = os.path.abspath(cache_path)
extract_path = os.path.abspath(extract_path)

# Make the data folder if not exist yet
if not os.path.exists(data_folder):
    os.makedirs(data_folder)

## Save channel info

In [120]:
import requests
import json

channel_info_path = f'{data_folder}\\channel_info.json'
print(f'https://aka.ms/vs/{vs_version}/{vs_channel}/channel')

if not vs_verify_manifest:
    # Load local channel info file
    with open(channel_info_path, encoding = "utf8") as file:
        load = json.load(file)
else:
    channel_info = requests.get(url = f'https://aka.ms/vs/{vs_version}/{vs_channel}/channel')
    # Get channel info (200 = HTTP OK)
    if channel_info.status_code == 200:
        # Load the channel info as json and make it pretty
        # Dumping it directly may cause encoding error
        load = json.loads(channel_info.content)
        # Ensure unicode is not converted to ascii
        dump = json.dumps(load, indent = 4, ensure_ascii = False)
        # Save the channel info
        with open(channel_info_path, 'wb') as file:
            file.write(dump.encode())
    else:
        raise ConnectionError('Failed to download channel info!')

https://aka.ms/vs/17/release/channel


In [121]:
# Case insensitive dict key (value is still case sensitive)
# For dealing with case inconsistencies
from requests.structures import CaseInsensitiveDict as LowerDict

# I noticed some case inconsistencies while observing the manifest
# Most of them are related to package/dependency id, chip/arch, and language 
# This function should be used when comparing/checking string in above scenarios 
# However, you should keep the original case when saving the string/variable
def lower(string):
    """ Convert everything that contains string (`str`, `list`, etc) to lowercase """
    if isinstance(string, str):
        return None if string == None else string.lower()
    elif isinstance(string, (list, tuple, set, dict, LowerDict)):
        return [ None if i == None else i.lower() for i in string ]

def lower_remove(list: list, string: str):
    """ Remove all matched string from list (case insensitive) """
    for i in list:
        if i == string or i == lower(string):
            list.remove(i)
        elif lower(i) == string:
            list.remove(lower(i))

In [122]:
# Regex
import re

# Read the channel info again
channel_info = LowerDict(load)

for i in channel_info['channelItems']:
    # Search for VS product id (build tools)
    if lower(i['id']) == lower(vs_product_id):
        # Save latest build version
        if i.get('version'): vs_version = i['version']

        if i.get('localizedResources'):
            for j in i['localizedResources']:
                # "localizedResources" doesn't have "neutral" language
                # Prioritize preferred language first
                for k in vs_lang:
                    # Print license link and other info
                    if lower(j['language']) == lower(k):
                        print(f'{j["title"]} ({vs_version})')
                        print(j['description'])
                        print(f'License: {j["license"]}\n')
                        break
            break

# Take only major and minor version then convert to float
# For example, 17.7.34024.191 will become 17.7
vs_version = float(re.search(r'^(\d+\.\d+)', vs_version).group(1))

if vs_license_accept:
    print('By continuing, I already accepted the license listed above')
else:
    while True:
        vs_license_accept = input('Do you accept the license listed above (Y/N)?')
        if lower(vs_license_accept) == 'y': break
        else: print('You must accept the license before continuing!')

Visual Studio Build Tools 2022 (17.7.34031.279)
The Visual Studio Build Tools allows you to build native and managed MSBuild-based applications without requiring the Visual Studio IDE. There are options to install the Visual C++ compilers and libraries, MFC, ATL, and C++/CLI support.
License: https://go.microsoft.com/fwlink/?LinkId=2179911

By continuing, I already accepted the license listed above


## Save manifest

In [123]:
from warnings import warn
import hashlib

def check_sha256sum(file_path: str, correct_sha256: str) -> bool:
    sha = hashlib.sha256()
    with open(file_path, 'rb') as f:
        while True:
            # Set buffer as 64 kB
            # Prevent out-of-memory for large files
            data = f.read(65536)
            if not data: break
            sha.update(data)
    return lower(correct_sha256) == lower(sha.hexdigest())

# Get manifest from channel info
for i in channel_info['channelItems']:
    # Get product id and manifest URL
    if lower(i['type']) == lower('Manifest'):
        print(i['id'])
        # Normally only one payload exists, therefore index 0
        print(i['payloads'][0]['url'])
        if len(i['payloads']) > 1:
            warn('Manifest has more than 1 payload!')
        # Save the manifest URL
        manifest = i['payloads'][0]['url']
        manifest_sha256 = i['payloads'][0]['sha256']
        break

manifest_path = f'{data_folder}\\manifest.json'
manifest_valid = False

if os.path.isfile(manifest_path):
    if vs_verify_manifest:
        manifest_valid = check_sha256sum(manifest_path, manifest_sha256)
    else:
        manifest_valid = True

if manifest_valid:
    # Load local manifest file
    with open(manifest_path, encoding = "utf8") as file:
        load = json.load(file)    
else:
    # Download manifest if necessary
    manifest = requests.get(url = manifest)
    if manifest.status_code == 200:
        load = json.loads(manifest.content)
        dump = json.dumps(load, indent = 4, ensure_ascii = False)
        # Save the manifest as json
        with open(f'{data_folder}\\manifest.json', 'wb') as file:
            file.write(dump.encode())
    else:
        raise ConnectionError('Failed to download manifest!')

Microsoft.VisualStudio.Manifests.VisualStudio
https://download.visualstudio.microsoft.com/download/pr/e514a25b-a89d-4051-a63c-05ccd9be41e9/5f11f64927ac42b2e9f4c4c943b309572ca7b2702770731e3a3180a9c7033daf/VisualStudio.vsman


In [124]:
import pandas as pd

# Read the manifest again
# Do not modify the manifest! (read-only)
# Manifest will be used many times
manifest = LowerDict(load)

# Get all types of package (for research)
pkg_types = LowerDict()
for i in manifest['packages']:
    if i["type"] not in pkg_types: # Lower dict
        pkg_types[i['type']] = 1
    else:
        pkg_types[i['type']] += 1

df = pd.DataFrame.from_dict(
    pkg_types,
    orient = 'index',
    columns = ['packages_num']
).rename_axis('package_type').reset_index()

df

Unnamed: 0,package_type,packages_num
0,Component,477
1,Msi,1549
2,Exe,112
3,Zip,21
4,Vsix,10814
5,Msu,14
6,Group,83
7,WindowsFeature,3
8,Product,10
9,Workload,36


## Get workload info

In [125]:
# Get how many workloads (specializations) are there
num = pkg_types['Workload']

# Initialize empty dataframe
df = pd.DataFrame(
    columns = ['id', 'title', 'version', 'description', 'category', 'dependenciesNum'],
    index = range(num)
)

# Initialize workload as a dict list (workload id as key)
# Each workload is derived from manifest "packages"
workload = LowerDict()

num = -1
for i in manifest['packages']:
    if lower(i['type']) == lower('Workload'):
        # Save workload props (values) to "workload" variable
        workload[i['id']] = i

        num += 1
        df.loc[num, 'id'] = i.get('id')
        df.loc[num, 'version'] = i.get('version')
        # How many dependencies that the workload have?
        if i.get('dependencies'):
            df.loc[num, 'dependenciesNum'] = len(i['dependencies'])
        else: df.loc[num, 'dependenciesNum'] = 0
        # Get descriptive info for each workload
        if i.get('localizedResources'):
            for j in i['localizedResources']:
                # "localizedResources" doesn't have "neutral" language
                # Prioritize preferred language first
                for k in vs_lang:
                    if lower(j['language']) == lower(k):
                        df.loc[num, 'title'] = j.get('title')
                        df.loc[num, 'description'] = j.get('description')
                        # Category may not always exist
                        df.loc[num, 'category'] = j.get('category')
                        break

# Save summarized info of workloads
df.to_json(
    f'{data_folder}\\workload_summary.json',
    indent = 4,
    force_ascii = False,
    orient = 'records'
)

# Save detailed info of workloads
# Convert LowerDict to dict first before exporting
dump = json.dumps(dict(workload), indent = 4, ensure_ascii = False)
with open(f'{data_folder}\\workload_details.json', 'wb') as file:
    file.write(dump.encode())

df

Unnamed: 0,id,title,version,description,category,dependenciesNum
0,Microsoft.VisualStudio.Workload.Azure,Azure development,17.7.33905.399,"Azure SDKs, tools, and projects for developing...",Web & Cloud,18
1,Microsoft.VisualStudio.Workload.AzureBuildTools,Azure development build tools,17.7.33905.399,MSBuild tasks and targets for building Azure a...,Web & Cloud,15
2,Microsoft.VisualStudio.Workload.CoreEditor,Visual Studio core editor,17.7.33905.399,"The Visual Studio core shell experience, inclu...",,1
3,Microsoft.VisualStudio.Workload.Data,Data storage and processing,17.7.33905.399,"Connect, develop, and test data solutions with...",Other Toolsets,4
4,Microsoft.VisualStudio.Workload.DataBuildTools,Data storage and processing build tools,17.7.33905.399,Build SQL Server Database Projects,Web & Cloud,3
5,Microsoft.VisualStudio.Workload.DataScience,Data science and analytical applications,17.7.33905.399,Languages and tooling for creating data scienc...,Other Toolsets,4
6,Microsoft.VisualStudio.Workload.ManagedDesktop,.NET desktop development,17.7.34002.345,"Build WPF, Windows Forms, and console applicat...",Desktop & Mobile,24
7,Microsoft.VisualStudio.Workload.ManagedDesktop,.NET desktop development,17.7.34002.345,"Build WPF, Windows Forms, and console applicat...",Desktop & Mobile,26
8,Microsoft.VisualStudio.Workload.ManagedDesktop...,.NET desktop build tools,17.7.33905.399,"Tools for building WPF, Windows Forms, and con...",Desktop & Mobile,12
9,Microsoft.VisualStudio.Workload.ManagedGame,Game development with Unity,17.7.33905.399,"Create 2D and 3D games with Unity, a powerful ...",Gaming,5


In [126]:
print(workload)

{'Microsoft.VisualStudio.Workload.Azure': {'id': 'Microsoft.VisualStudio.Workload.Azure', 'version': '17.7.33905.399', 'type': 'Workload', 'icon': {'mimeType': 'image/svg+xml', 'base64': 'PHN2ZyBpZD0iZTFkYmY3ZGQtZjFiMS00YmI4LWFiZDgtNDBmMTljZjBkZGViIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSIzMiIgaGVpZ2h0PSIzMiIgdmlld0JveD0iMCAwIDMyIDMyIj4NCiAgPGRlZnM+DQogICAgPGxpbmVhckdyYWRpZW50IGlkPSJmMDY5Y2FhNi04NGYxLTRiZTQtODJiNy05YzAxN2YzYWUzMmYiIHgxPSItODA1LjY5NTQiIHkxPSIxMTkuNjM3NyIgeDI9Ii04MTQuNjA3NCIgeTI9IjkzLjMwOTUiIGdyYWRpZW50VHJhbnNmb3JtPSJtYXRyaXgoMSwgMCwgMCwgLTEsIDgyMCwgMTI0KSIgZ3JhZGllbnRVbml0cz0idXNlclNwYWNlT25Vc2UiPg0KICAgICAgPHN0b3Agb2Zmc2V0PSIwIiBzdG9wLWNvbG9yPSIjMTE0YThiIi8+DQogICAgICA8c3RvcCBvZmZzZXQ9IjEiIHN0b3AtY29sb3I9IiMwNjY5YmMiLz4NCiAgICA8L2xpbmVhckdyYWRpZW50Pg0KICAgIDxsaW5lYXJHcmFkaWVudCBpZD0iZjE2MzU4Y2QtNjcxNC00MjZhLWIyNDgtMzY3ZTBhNDI5MDFmIiB4MT0iLTgwNC4wNjMyIiB5MT0iMTIwLjQwNTkiIHgyPSItNzk0LjI4MDciIHkyPSI5NC4zNDI2IiBncmFkaWVudFRyYW5zZm9ybT0ibWF0cml4KDEsIDAsIDAs

## Filter workload and get main dependencies

In [127]:
# Continue (skip) 1st loop from (inner) 2nd loop
class ContinueParent(Exception): pass

# Some packages may only have 1 variant of chip/arch available
# If less strict mode is set to true, even if that package variant doesn't
# match our preferred chip/arch, it will still be included as dependency
less_strict = False

# List of chosen workload id(s)
# Change it based on your needs
wl_needed = [
    # I think MSBuildTools is required for any build tools
    'Microsoft.VisualStudio.Workload.MSBuildTools',
    # Desktop development with C++ (build tools)
    'Microsoft.VisualStudio.Workload.VCTools'
]

# Initial dependencies (packages) of workload
# Does not include nested dependencies yet
dp_init = LowerDict()

def check_version(ver_range: str) -> bool:
    """ Check if dependency version requirement is valid for current VS version """
    # I tried my best okay, I suck at regex
    try:
        # Assume that it supports all version
        if ver_range in ('', None): return True
        # Search for pattern like this: [17.0,18.0)
        elif re.search(r'^\[\d+\.\d+,\d+\.\d+\)$', ver_range):
            min_ver = re.search(r'^\[(\d+\.\d+)', ver_range).group(1) # 17.0
            max_ver = re.search(r',(\d+\.\d+)\)$', ver_range).group(1) # 18.0
        # Search for pattern like this: [17.0,)
        elif re.search(r'^\[\d+\.\d+,\)$', ver_range):
            min_ver = re.search(r'^\[(\d+\.\d+)', ver_range).group(1) # 17.0
            max_ver = float('inf') # Infinity
        # Search for pattern like this: 17.0.0.0
        # Or just until the minor version: 17.0
        elif re.search(r'^\d+(\.(\d+)){0,3}$', ver_range):
            min_ver = re.search(r'^(\d+\.\d+)', ver_range).group(1) # 17.0
            max_ver = min_ver
        else: warn(f'Version format "{ver_range}" is unsupported!')
    except AttributeError:
        warn(f'Pattern matching group is not found for "{ver_range}"!')
        raise

    if vs_version >= float(min_ver) and vs_version <= float(max_ver): return True
    return False

# Used together with "get_packages_info"
# Output will be written as a dict list (dependency id as key)
def get_sub_dependencies(parent_id: str, parent_props: LowerDict, output_dp_list: LowerDict) -> bool:
    """ List all dependencies one level below (not nested) and write to output """
    # Check if the parent has dependencies
    if parent_props.get('dependencies'):
        # MS write package (sub) dependencies as dict, not as list
        # Therefore loop it as dict (with key and value)
        for key, value in parent_props['dependencies'].items():
            # If the dependency value is still a dict
            if isinstance(value, dict):
                # Some dependencies have id written as a subkey (e.g. AspNet)
                # Otherwise, take the parent dict name (key) directly as id
                d_id = value.get('id')
                if not d_id: d_id = key
                # VS version that's required to install the dependency
                # If it doesn't support the current version then skip
                d_version = value.get('version')
                if not check_version(d_version): continue
                # Get type and send a warning if the type is wrong (e.g. vsix, msi, exe)
                d_type = value.get('type')
                if lower(d_type) not in lower(['Recommended', 'Optional', '', None]):
                    warn(f'Recommendation flag ({d_type}) is different than expected!')
                # Sometimes the dependency chip/arch type is also explicitly stated
                # It means that it's required but only if you have a specific chip/arch
                # The available chip/arch keys are "chip", "machineArch", and "productArch"
                # Though from my observation, "productArch" is never listed as a requirement
                d_chip = value.get('chip')
                if lower(d_chip) not in lower([vs_chip[0], '', None]): continue
                d_machine_arch = value.get('machineArch')
                if lower(d_machine_arch) not in lower([vs_host[0], '', None]): continue
                # Only add the dependency if the product id matches (build tools)
                # Note that dict "get()" will treat empty string/object as true,
                # While Python logic will treat empty string/object as false
                d_prod_id = value.get('when')
                if d_prod_id and lower(vs_product_id) not in lower(d_prod_id): continue

            # If the dependency value is a string (version number)
            else:
                d_id = key
                d_version = value
                d_type = None
                d_chip = None
                d_machine_arch = None

            if not output_dp_list.get(d_id):
                # If the dependency is not listed yet
                output_dp_list[d_id] = { # Lower dict
                    # Id of the parent element that needs this dependency
                    'parentId': parent_id,
                    # Requirement to install the dependency (from the parent)
                    'require': {
                        # Recommendation flag (recommended or optional)
                        # If the type is none it's usually required or platform-specific
                        'flag': d_type,
                        'version': d_version,
                        'chip': d_chip,
                        'machineArch': d_machine_arch
                    },
                    # Identifier that we only have basic info about this dependency
                    # Once we call "get_packages_info", it will be removed
                    'incomplete': True
                }
            # If the dependency is already listed, just check for important value(s)
            # May need to review if language/chip/arch should also be checked or not
            else:
                # Change the optional flag if it becomes recommended or required
                if lower(d_type) == lower('Recommended') or d_type == None:
                    output_dp_list[d_id]['require']['flag'] = d_type

        # Return true if it has dependencies, otherwise false
        return True
    else:
        return False

# Get dependencies from the workload(s) and save them as dict list
# Dependency id is used as key, disallowing duplicates if there are multiple variants
# May be a bad idea but I'm too lazy to rework the entire codes
# However, it will also prevent infinite loop if child dependency refers to the parent again
for id, props in workload.items():
    if lower(id) in lower(wl_needed):
        # Get dependencies from parent id and props, write/add the results to "dp_init"
        get_sub_dependencies(id, props, dp_init)

# Useful to search for language and chip/arch variants for specific dependency id
# Though it may take some time if we call this function multiple times
def get_packages_variants(src_manifest: LowerDict, dp_id: str, pkg_index: int = None, max_pkg: int = None) -> dict:
    """ Check available variants of a package (dependency) id.
        If `pkg_index` (start) and `max_pkg` are valid, packages list will be sliced.
        That will improve performance instead of looping the entire manifest packages """
    variants = {
        'id': dp_id,
        'language': [],
        'chip': [],
        'machineArch': [],
        'productArch': []
    }

    # Get how many packages are there (for index checking)
    num_pkg = len(src_manifest['packages'])

    if pkg_index and max_pkg:
        # Start searching variants at "pkg_index" until "max_pkg" is reached
        start = pkg_index
        end = pkg_index + max_pkg + 1
        # Make sure the index is not out of bound
        if end > num_pkg: end = num_pkg
    else:
        start = 0
        end = num_pkg

    num = 0
    for i in src_manifest['packages'][start:end]:
        if lower(i['id']) == lower(dp_id):
            # Track how many variants are there
            num += 1
            # Add variants only if the key exists
            for key in variants:
                if i.get(key):
                    if lower(key) == 'id': variants[key] = i.get(key)
                    else: variants[key].append(i[key])

    for key in set(variants) - {'id'}:
        # If the key value is not an empty list
        if variants.get(key):
            # Warn if key only exists in some variants
            if len(variants[key]) != num:
                # You can remove the warning if it's annoying
                # Though this one can be used to check in case of missing file
                warn(f'Not all variants of {variants["id"]} have "{key}" key!')

    return variants

# If you modify "get_sub_dependencies", please make sure it doesn't break this function
def get_packages_info(src_manifest: LowerDict, input_dp_list: LowerDict):
    """ Fetch packages info from manifest to add that info to input """
    dp_variant = None
    for index, i in enumerate(src_manifest['packages']):
        if i['id'] in input_dp_list: # Lower dict
            # Skip getting info for this dependency (package) if it has already been done
            # By checking if the "incomplete" key exists or not
            if not input_dp_list[i['id']].get('incomplete'): continue

            # Check if current package has any of these keys
            if any([i.get('language'), i.get('chip'), i.get('productArch'), i.get('machineArch')]):
                # If package variants haven't been searched before then search it now
                if not dp_variant or lower(dp_variant['id']) != lower(i['id']):
                    if i.get('language'):
                        # Language combined with arch may create many variants
                        # 14 language * 3 machineArch * 3 productArch = 126 variants
                        # Use a bigger number of max packages to search in case of bad math
                        # Also as a fail-safe for the possibility of future values
                        dp_variant = get_packages_variants(manifest, i['id'], index, 250)
                    else:
                        # 3 machineArch * 3 productArch = 9 variants
                        dp_variant = get_packages_variants(manifest, i['id'], index, 30)

            # Some packages may be listed multiple times but with different props (values)
            # If that happens, we can filter it to get just one variant instead of all
            # This applies to language, chip/arch, and maybe some others (need research)

            # "chip" and "machineArch" are (most likely) used to match host arch with package arch
            # These keys may also exist in (sub) "dependencies" key of a package (as a requirement)
            # Be wary though, I think the possible valid values for "chip" are only X86 and X64
            # For example, if "machineArch" is arm64, the "chip" key (if exist) is still X64

            # Some packages may also list themselves as a (sub) dependency (with specific requirement)
            # In that case, the package will not be replaced if current system doesn't match the requirement
            # Not sure if it's the right thing to do, may need to review it again in the future

            # "productArch" is something else and it's not reliable to equalize it as target arch
            # This key rarely exists, and sometimes the value is "neutral" even though it's for specific arch
            # There's also target arch in id (e.g. targetX64) but still may not contain "productArch"
            # Filtering target arch in package id is possible (with regex), but still kind of risky

            # Anyway, these values are unreliable (may not always exist), so don't rely much on it
            # Skipping things recklessly by just matching these values may cause missing file(s)
            # I did my best to prevent this, but it's definitely not bulletproof

            # Example cases regarding chip/arch (analyze the conclusion on your own):
            #   'Microsoft.VS.VC.vckey.arm64_x64.Shortcuts', (machine arch not equal chip?)
            #   'Microsoft.VisualStudio.VC.MSBuild.v170.X64.v143' (X64 arch but neutral?)
            #   'Microsoft.VisualCpp.Premium.Tools.HostX64.TargetX86' (chip = host?)
            #   'Microsoft.VisualStudio.Debugger.Parallel.Resources' (arch not in id)
            #   'Microsoft.NetCore.TargetingPack.6.0.6.0.21' (dependency trap)

            # Variant keys that need to be checked, priority is based on the index
            # Only one final package will be chosen no matter how many package variants are found
            # Check the json file manually to see if the final language/chip/arch is correct
            # Compare the variant you get with variants from manifest (by searching package id)
            variants = ['language', 'machineArch', 'chip', 'productArch']

            try:
                for key in variants:
                    # Preferred variants for language, chip, etc
                    if key == 'language': vs_spec = vs_lang
                    elif key == 'chip': vs_spec = vs_chip
                    # Assume it's arch if not the others
                    else: vs_spec = vs_host

                    # Note that dict "get()" will treat empty string/object as true,
                    # While Python logic will treat empty string/object as false
                    val = i.get(key)
                    # Therefore, remove "key" if it's just an empty string/object
                    if i.get(key) and not val: i.pop(key)

                    # If "key" is not explicitly stated as a requirement from the parent
                    if val and not input_dp_list[i['id']]['require'].get(key):
                        # If the neutral variant or my preferred variant exists
                        if any(lower(x) in lower(['neutral', *vs_spec]) for x in dp_variant[key]):
                            # Prioritize variant based on "vs_spec" index order
                            # The latter will be removed if an earlier index variant exists
                            for x in vs_spec:
                                if lower(x) in lower(dp_variant[key]):
                                    # Remove the latter variants and set only this one
                                    dp_variant[key] = [x]
                                    break
                            # Skip this one if it's not the wanted variant
                            if lower(i[key]) not in lower(['neutral', *dp_variant[key]]):
                                raise ContinueParent()
                        else:
                            # If it's the only variant available, should we skip it?
                            if less_strict: pass
                            else:
                                input_dp_list.pop(i['id'])
                                raise ContinueParent() # Skip
                    # If "key" is explicitly stated as a requirement from the parent
                    elif val and input_dp_list[i['id']]['require'].get(key):
                        # If "key" is not the same as the needed requirement
                        if lower(i[key]) != lower(input_dp_list[i['id']]['require'].get(key)):
                            if len(dp_variant[key]) == 1:
                                # If it's the only variant available, should we skip it?
                                if less_strict: pass
                                else:
                                    input_dp_list.pop(i['id'])
                                    raise ContinueParent() # Skip
                            else: raise ContinueParent() # Skip
            except ContinueParent:
                continue

            # Keys to ignore for all packages, add based on your needs
            # Keys may be skipped if we want to process it manually later
            # Or just skip it because we don't want it to clutter the final json file
            keys_to_ignore = [
                'id',
                'localizedResources',
                'dependencies'
            ]

            # Auto-add all available keys from package id
            for key in i:
                if key not in keys_to_ignore:
                    val = i.get(key)
                    if val: input_dp_list[i['id']][key] = val

            # Don't add all translations, just the preferred one
            if i.get('localizedResources'):
                for item in i['localizedResources']:
                    # "localizedResources" doesn't have "neutral" language
                    # Prioritize preferred language first
                    for j in vs_lang:
                        if lower(item['language']) == lower(j):
                            input_dp_list[i['id']]['title'] = item.get('title')
                            input_dp_list[i['id']]['description'] = item.get('description')
                            input_dp_list[i['id']]['category'] = item.get('category')
                            break

            # Does the dependency have other (nested) dependencies?
            if i.get('dependencies') not in ('', {}, None):
                input_dp_list[i['id']]['dependencies'] = i.get('dependencies')
                input_dp_list[i['id']]['dependenciesNum'] = len(i['dependencies'])
            else:
                input_dp_list[i['id']]['dependenciesNum'] = 0

            # Mark it as complete once we get all important info (by deleting the key)
            input_dp_list[i['id']].pop('incomplete')

# Call the function after declaring
get_packages_info(manifest, dp_init)

df = pd.DataFrame.from_dict(
    dp_init,
    orient = 'index'
).rename_axis('id').reset_index()

cols_to_show = [
    'id',
    'parentId',
    'require',
    'title',
    'description',
    'category',
    'dependenciesNum',
]
# Only show some columns, hide/drop the rest
df = df.loc[:, df.columns.isin(cols_to_show)]

# Save current workload(s) dependencies (based on "wl_needed")
# These dependencies are only one level below, not fully nested
df.to_json(
    f'{data_folder}\\dependency_summary.json',
    indent = 4,
    force_ascii = False,
    orient = 'records'
)

df

Unnamed: 0,id,parentId,require,title,description,category,dependenciesNum
0,Microsoft.Component.MSBuild,Microsoft.VisualStudio.Workload.MSBuildTools,"{'flag': None, 'version': '[17.0,18.0)', 'chip...",MSBuild,MSBuild tasks and supported C#/VB compilers.,"Compilers, build tools, and runtimes",5
1,Microsoft.Build.Dependencies,Microsoft.VisualStudio.Workload.MSBuildTools,"{'flag': None, 'version': '[17.0,18.0)', 'chip...",,,,1
2,Microsoft.VisualStudio.Component.CoreBuildTools,Microsoft.VisualStudio.Workload.MSBuildTools,"{'flag': None, 'version': '[17.0,18.0)', 'chip...",Visual Studio Build Tools Core,Core features needed for supporting Visual Stu...,,3
3,Microsoft.VisualStudio.Component.VC.CoreBuildT...,Microsoft.VisualStudio.Workload.VCTools,"{'flag': None, 'version': '[17.0,18.0)', 'chip...",C++ Build Tools core features,Core features needed for supporting Visual C++...,Development activities,8
4,Microsoft.VisualStudio.Component.VC.Tools.x86.x64,Microsoft.VisualStudio.Workload.VCTools,"{'flag': 'Recommended', 'version': '[17.0,18.0...",MSVC v143 - VS 2022 C++ x64/x86 build tools (L...,Visual C++ (v143) latest compilers and librari...,"Compilers, build tools, and runtimes",35
5,Microsoft.VisualStudio.Component.VC.Redist.14....,Microsoft.VisualStudio.Workload.VCTools,"{'flag': None, 'version': '[17.0,18.0)', 'chip...",C++ 2022 Redistributable Update,Latest update for the Visual C++ Redistributable,"Compilers, build tools, and runtimes",1
6,Microsoft.VisualStudio.VC.UnitTest.Desktop.Bui...,Microsoft.VisualStudio.Workload.VCTools,"{'flag': None, 'version': '[17.0,18.0)', 'chip...",,,,1
7,Microsoft.VisualStudio.Component.Windows11SDK....,Microsoft.VisualStudio.Workload.VCTools,"{'flag': 'Recommended', 'version': '[17.0,18.0...",Windows 11 SDK (10.0.22621.0),"Headers, libraries, and tools for developing a...","SDKs, libraries, and frameworks",1
8,Microsoft.VisualStudio.Component.VC.CMake.Project,Microsoft.VisualStudio.Workload.VCTools,"{'flag': 'Recommended', 'version': '[17.0,18.0...",C++ CMake tools for Windows,Visual C++ tools support for CMake.,"Compilers, build tools, and runtimes",6
9,Microsoft.VisualStudio.Component.TestTools.Bui...,Microsoft.VisualStudio.Workload.VCTools,"{'flag': 'Recommended', 'version': '[17.0,18.0...",Testing tools core features - Build Tools,Includes core test tools to run tests from CLI.,Debugging and testing,1


In [128]:
print(dp_init)

{'Microsoft.Component.MSBuild': {'parentId': 'Microsoft.VisualStudio.Workload.MSBuildTools', 'require': {'flag': None, 'version': '[17.0,18.0)', 'chip': None, 'machineArch': None}, 'version': '17.7.33905.399', 'type': 'Component', 'title': 'MSBuild', 'description': 'MSBuild tasks and supported C#/VB compilers.', 'category': 'Compilers, build tools, and runtimes', 'dependencies': {'Microsoft.Build': '[16.0,18.0)', 'Microsoft.VisualStudio.NativeImageSupport': '[17.0,18.0)', 'Microsoft.VisualStudio.Component.Roslyn.Compiler': '[17.0,18.0)', 'Microsoft.NuGet.Build.Tasks.Setup': '[16.0,18.0)', 'Microsoft.PythonTools.BuildCore.Vsix_1230994547': {'id': 'Microsoft.PythonTools.BuildCore.Vsix', 'version': '', 'when': ['Microsoft.VisualStudio.Product.BuildTools']}}, 'dependenciesNum': 5}, 'Microsoft.Build.Dependencies': {'parentId': 'Microsoft.VisualStudio.Workload.MSBuildTools', 'require': {'flag': None, 'version': '[17.0,18.0)', 'chip': None, 'machineArch': None}, 'version': '17.7.33906.96', 't

## Filter dependencies

In [129]:
from copy import deepcopy

print(f'Num of dependencies (before): {len(dp_init)}')

dp_remove = [
    # Dependencies id you may want to remove (one time removal only)
    # If during later process the dependency is needed by other component, it will be added again
    # This list will also be used again later so just keep it even if empty
]

# Start excluding dependencies you don't need
# For starter, I'm also removing all dependencies flagged as optional
# Use deepcopy because we can't iterate while removing things from the iterator
for i in deepcopy(dp_init):
    if lower(dp_init[i]) in lower(dp_remove):
        dp_init.pop(i)
        lower_remove(dp_remove, i)
        print(f'Removed {i}')
    # Remove optional dependencies
    elif lower(dp_init[i]['require']['flag']) == lower('Optional'):
        dp_init.pop(i)
        print(f'Removed {i}')

print(f'Num of dependencies (after): {len(dp_init)}')

df = pd.DataFrame.from_dict(
    dp_init,
    orient = 'index'
).rename_axis('id').reset_index()

# Only show some columns, hide/drop the rest
df = df.loc[:, df.columns.isin(cols_to_show)]

# Replace the previous file with filtered values
# These dependencies are only one level below, not fully nested
df.to_json(
    f'{data_folder}\\dependency_summary.json',
    indent = 4,
    force_ascii = False,
    orient = 'records'
)

df

Num of dependencies (before): 24
Removed Microsoft.VisualStudio.Component.VC.ATL
Removed Microsoft.VisualStudio.Component.VC.ATLMFC
Removed Microsoft.VisualStudio.Component.VC.CLI.Support
Removed Microsoft.VisualStudio.Component.VC.Modules.x86.x64
Removed Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Llvm.Clang
Removed Microsoft.VisualStudio.Component.Windows11SDK.22000
Removed Microsoft.VisualStudio.Component.Windows10SDK.20348
Removed Microsoft.VisualStudio.Component.Windows10SDK.19041
Removed Microsoft.VisualStudio.Component.Windows10SDK.18362
Removed Microsoft.VisualStudio.ComponentGroup.VC.Tools.142.x86.x64
Removed Microsoft.VisualStudio.Component.VC.v141.x86.x64
Removed Microsoft.VisualStudio.Component.VC.140
Num of dependencies (after): 12


Unnamed: 0,id,parentId,require,title,description,category,dependenciesNum
0,Microsoft.Component.MSBuild,Microsoft.VisualStudio.Workload.MSBuildTools,"{'flag': None, 'version': '[17.0,18.0)', 'chip...",MSBuild,MSBuild tasks and supported C#/VB compilers.,"Compilers, build tools, and runtimes",5
1,Microsoft.Build.Dependencies,Microsoft.VisualStudio.Workload.MSBuildTools,"{'flag': None, 'version': '[17.0,18.0)', 'chip...",,,,1
2,Microsoft.VisualStudio.Component.CoreBuildTools,Microsoft.VisualStudio.Workload.MSBuildTools,"{'flag': None, 'version': '[17.0,18.0)', 'chip...",Visual Studio Build Tools Core,Core features needed for supporting Visual Stu...,,3
3,Microsoft.VisualStudio.Component.VC.CoreBuildT...,Microsoft.VisualStudio.Workload.VCTools,"{'flag': None, 'version': '[17.0,18.0)', 'chip...",C++ Build Tools core features,Core features needed for supporting Visual C++...,Development activities,8
4,Microsoft.VisualStudio.Component.VC.Tools.x86.x64,Microsoft.VisualStudio.Workload.VCTools,"{'flag': 'Recommended', 'version': '[17.0,18.0...",MSVC v143 - VS 2022 C++ x64/x86 build tools (L...,Visual C++ (v143) latest compilers and librari...,"Compilers, build tools, and runtimes",35
5,Microsoft.VisualStudio.Component.VC.Redist.14....,Microsoft.VisualStudio.Workload.VCTools,"{'flag': None, 'version': '[17.0,18.0)', 'chip...",C++ 2022 Redistributable Update,Latest update for the Visual C++ Redistributable,"Compilers, build tools, and runtimes",1
6,Microsoft.VisualStudio.VC.UnitTest.Desktop.Bui...,Microsoft.VisualStudio.Workload.VCTools,"{'flag': None, 'version': '[17.0,18.0)', 'chip...",,,,1
7,Microsoft.VisualStudio.Component.Windows11SDK....,Microsoft.VisualStudio.Workload.VCTools,"{'flag': 'Recommended', 'version': '[17.0,18.0...",Windows 11 SDK (10.0.22621.0),"Headers, libraries, and tools for developing a...","SDKs, libraries, and frameworks",1
8,Microsoft.VisualStudio.Component.VC.CMake.Project,Microsoft.VisualStudio.Workload.VCTools,"{'flag': 'Recommended', 'version': '[17.0,18.0...",C++ CMake tools for Windows,Visual C++ tools support for CMake.,"Compilers, build tools, and runtimes",6
9,Microsoft.VisualStudio.Component.TestTools.Bui...,Microsoft.VisualStudio.Workload.VCTools,"{'flag': 'Recommended', 'version': '[17.0,18.0...",Testing tools core features - Build Tools,Includes core test tools to run tests from CLI.,Debugging and testing,1


In [130]:
# Include miscellaneous dependencies you may need (preferably components)
# Sometimes, some dependencies are actually required but may not be listed
# E.g. Universal C Runtime (UCRT) is normally included if using the build tools installer
# Even if this list is empty, just keep it because it may be used later
dp_misc = [
    'Microsoft.Windows.UniversalCRT.Redistributable.Msi'
]

vs_config = { "version": "1.0", "components": [] }
for i in manifest['packages']:
    if lower(i['id']) in lower([*dp_init, *dp_misc]):
        # Only components are supported
        if lower(i['type']) == lower('Component'):
            if lower(i['id']) not in lower(vs_config['components']):
                vs_config['components'].append(i['id'])

# Export configuration (can be imported by build tools installer)
dump = json.dumps(vs_config, indent = 4)
with open(f'{data_folder}\\.vsconfig', 'wb') as file:
    file.write(dump.encode())

In [131]:
print(dp_init)

{'Microsoft.Component.MSBuild': {'parentId': 'Microsoft.VisualStudio.Workload.MSBuildTools', 'require': {'flag': None, 'version': '[17.0,18.0)', 'chip': None, 'machineArch': None}, 'version': '17.7.33905.399', 'type': 'Component', 'title': 'MSBuild', 'description': 'MSBuild tasks and supported C#/VB compilers.', 'category': 'Compilers, build tools, and runtimes', 'dependencies': {'Microsoft.Build': '[16.0,18.0)', 'Microsoft.VisualStudio.NativeImageSupport': '[17.0,18.0)', 'Microsoft.VisualStudio.Component.Roslyn.Compiler': '[17.0,18.0)', 'Microsoft.NuGet.Build.Tasks.Setup': '[16.0,18.0)', 'Microsoft.PythonTools.BuildCore.Vsix_1230994547': {'id': 'Microsoft.PythonTools.BuildCore.Vsix', 'version': '', 'when': ['Microsoft.VisualStudio.Product.BuildTools']}}, 'dependenciesNum': 5}, 'Microsoft.Build.Dependencies': {'parentId': 'Microsoft.VisualStudio.Workload.MSBuildTools', 'require': {'flag': None, 'version': '[17.0,18.0)', 'chip': None, 'machineArch': None}, 'version': '17.7.33906.96', 't

## Get all (nested) dependencies

In [132]:
# Can be used to clear output if something went wrong (non-stop loop)
from IPython.display import clear_output

# List of dependencies we want to install/extract in the end
# Python sometimes uses reference when assigning value to variable
# Deepcopy will force it to copy as value instead of reference
dp_final = deepcopy(dp_init) # Lower dict

# Temporarily removed dependencies that have both payloads and (sub) dependencies
# May also workaround some troublesome packages that list themselves as (sub) dependency
# Can cause missing files if removed permanently
dp_payloads = LowerDict()

def still_has_dependencies(input_dp_list: LowerDict) -> bool:
    """ Check if all dependencies are lowest level (no more sub dependencies) """
    for id, props in input_dp_list.items():
        if props.get('incomplete') or props['dependenciesNum'] > 0:
            return True
    return False

# Fetch all nested dependencies and add it directly to main list
# This may take at worst a minute or two, but if it still doesn't stop, something is broken
while still_has_dependencies(dp_final):
    # Add our miscellaneous dependencies
    for i in dp_misc:
        # Match the keys written here with "get_sub_dependencies"
        # Fill most values with none because we only know the id
        dp_final[i] = {
            'parentId': None,
            'require': {
                'flag': None,
                'version': None,
                'chip': None,
                'machineArch': None
            },
            'incomplete': True
        }

    # Add all nested dependencies as same-level dependencies
    for id, props in deepcopy(dp_final).items():
        # Make sure we don't remove miscellaneous dependencies because we haven't got any info yet
        # Once we make an exception (else), remove the id from "dp_misc"
        # So all the dependencies info can be added/searched on the next iteration
        if lower(id) not in lower(dp_misc):
            # Get dependencies of parent id and props, write the results to "dp_final"
            has_dependencies = get_sub_dependencies(id, props, dp_final)
            # Move the parent to the removal list because we already have all the (sub) dependencies
            # This is to avoid searching dependencies for the same parent again and again
            # But we also must check if it has payloads or not before deleting
            if has_dependencies:
                dp_remove.append(id)
        else:
            lower_remove(dp_misc, id)

    # Fetch description and other info (version, payloads, etc)
    # This is for both miscellaneous dependencies and the newly added dependencies
    get_packages_info(manifest, dp_final)

    # Start removing parent elements of our dependencies
    # If the parent has payloads, it will be temporarily moved from "dp_final" to "dp_payloads"
    # It will be restored later once we make sure there is no more nested dependency
    for id, props in deepcopy(dp_final).items():
        if lower(id) in lower(dp_remove):
            dp_final.pop(id)
            lower_remove(dp_remove, id)

            if props.get('payloads'):
                dp_payloads[id] = props
                # warn(f'{id} has dependencies and payloads!')
                if lower(props.get('parentId')) == lower(id):
                    warn(f'{id} has itself as a dependency!')

    # clear_output(wait = True)

# Start re-adding removed parent packages that contain payloads
for id, props in dp_payloads.items():
    dp_final[id] = props

# Save all (nested) dependencies that contain payloads
dump = json.dumps(dict(dp_final), indent = 4, ensure_ascii = False)
with open(f'{data_folder}\\dependency_details.json', 'wb') as file:
    file.write(dump.encode())

  warn(f'Not all variants of {variants["id"]} have "{key}" key!')
  warn(f'Not all variants of {variants["id"]} have "{key}" key!')
  warn(f'Not all variants of {variants["id"]} have "{key}" key!')
  warn(f'Not all variants of {variants["id"]} have "{key}" key!')
  warn(f'Not all variants of {variants["id"]} have "{key}" key!')
  warn(f'Not all variants of {variants["id"]} have "{key}" key!')
  warn(f'Not all variants of {variants["id"]} have "{key}" key!')
  warn(f'Not all variants of {variants["id"]} have "{key}" key!')
  warn(f'Not all variants of {variants["id"]} have "{key}" key!')
  warn(f'Not all variants of {variants["id"]} have "{key}" key!')


In [133]:
print(dp_final)

{'Microsoft.Windows.UniversalCRT.Redistributable.Msi': {'parentId': None, 'require': {'flag': None, 'version': None, 'chip': None, 'machineArch': None}, 'version': '10.0.26624', 'type': 'Msi', 'payloads': [{'fileName': 'Universal CRT Redistributable-x86_en-us.msi', 'sha256': 'b7d7f7df27098ca742197689fd9ce63e0079d1e1e9a6c3a20cdaf54443c7116e', 'size': 311296, 'url': 'https://download.visualstudio.microsoft.com/download/pr/e106a47b-8ac7-48fb-bda0-c42b9fd24b6e/fae0b86414b598260f2efccc30882256/universal%20crt%20redistributable-x86_en-us.msi', 'signer': {'$ref': '4'}}, {'fileName': '9126f6ff98d955951fe9323f4444c119.cab', 'sha256': '39fac0fec9d1380b92108ef1ff1d53e267b73559e3f7e200e6589281ec96c07c', 'size': 455640, 'url': 'https://download.visualstudio.microsoft.com/download/pr/5aa29bdc-6bb2-4c9c-b0e8-286fd4bd4cee/39fac0fec9d1380b92108ef1ff1d53e267b73559e3f7e200e6589281ec96c07c/9126f6ff98d955951fe9323f4444c119.cab', 'signer': {'$ref': '5'}}, {'fileName': '948a611cd2aca64b1e5113ffb7b95d5f.cab',

In [134]:
import math

def convert_size(size_bytes: int, precision: int = 2):
    if size_bytes == 0: return '0 B'
    size_name = ('B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB')
    index = int(math.floor(math.log(size_bytes, 1024)))
    power = math.pow(1024, index)
    size = round(size_bytes / power, precision)
    return f'{size} {size_name[index]}'

total_files = 0
download_size = 0
file_ext = LowerDict()
pkg_type = LowerDict()

for i in dp_final.values():
    # Get all package types
    if i["type"] not in pkg_type: # Lower dict
        pkg_type[i['type']] = 1
    else:
        pkg_type[i['type']] += 1

    # Get package payloads info
    for j in i['payloads']:
        # Add total files to download
        total_files += 1
        # Add total download size
        if j.get('size'):
            download_size += j['size']
        # Get all file (payload) extensions
        ext = j['fileName'].split(".")[-1]
        if ext not in file_ext: # Lower dict
            file_ext[ext] = 1
        else: file_ext[ext] += 1

df = pd.DataFrame.from_dict(
    pkg_type,
    orient = 'index',
    columns = ['packages_num']
).rename_axis('package_type').reset_index()
display(df)

print(f'Total files (payloads) to download: {total_files}')
print(f'Total download size: {convert_size(download_size)}')

df = pd.DataFrame.from_dict(
    file_ext,
    orient = 'index',
    columns = ['payloads_num']
).rename_axis('payload_ext').reset_index()
display(df)

Unnamed: 0,package_type,packages_num
0,Msi,19
1,Vsix,294
2,Exe,6
3,Zip,2


Total files (payloads) to download: 597
Total download size: 2.02 GiB


Unnamed: 0,payload_ext,payloads_num
0,msi,100
1,cab,192
2,vsix,294
3,exe,7
4,zip,2
5,ps1,2


## Download payloads
- Linux users should be wary that each path is case-sensitive
- Recommended to change target path to a NTFS/FAT partition

In [135]:
pyl_list = []

for id, i in dp_final.items():
    # Folder name is composed based on these keys
    # Don't reorder the index unless you know what you're doing
    folder_keys = ['version', 'chip', 'language', 'productArch', 'machineArch']
    # Need to be reset each iteration
    folder_name = id

    for key in folder_keys:
        # Note that dict "get()" will treat empty string/object as true,
        # While Python logic will treat empty string/object as false
        val = i.get(key)
        if val and isinstance(val, str):
            folder_name += f',{key}={val}'

    for j in i['payloads']:
        # Not sure why, but the first added item will be written as tuple if dict is empty
        # I'm initializing non-empty dict directly as a workaround
        pyl = {'packageId': id}
        pyl['packageType'] = i['type']
        pyl['url'] = j['url']
        pyl['fileName'] = j['fileName']
        pyl['fileType'] = j['fileName'].split('.')[-1]
        pyl['targetDir'] = cache_path + '\\' + folder_name
        pyl['filePath'] = pyl['targetDir'] + '\\' + pyl['fileName']
        pyl['sha256'] = j['sha256']
        pyl['size'] = j['size']

        # For some special vsix packages
        # Key is case-insensitive (lower dict)
        if lower(i['type']) == 'vsix':
            # Note that dict "get()" will treat empty string/object as true,
            # While Python logic will treat empty string/object as false
            ext_dir = i.get('extensionDir')
            if ext_dir: pyl['extensionDir'] = ext_dir

        pyl_list.append(pyl)

df = pd.DataFrame(pyl_list)

# Save payload list as csv and json
# May be useful if you want to use external download manager
df.to_csv(
    f'{data_folder}\\payloads.csv',
    index = False,
    sep = ';'
)
df.to_json(
    f'{data_folder}\\payloads.json',
    indent = 4,
    force_ascii = False,
    orient = 'records'
)

df

Unnamed: 0,packageId,packageType,url,fileName,fileType,targetDir,filePath,sha256,size,extensionDir
0,Microsoft.Windows.UniversalCRT.Redistributable...,Msi,https://download.visualstudio.microsoft.com/do...,Universal CRT Redistributable-x86_en-us.msi,msi,c:\Users\Dhika\Documents\Projects\Proto\Python...,c:\Users\Dhika\Documents\Projects\Proto\Python...,b7d7f7df27098ca742197689fd9ce63e0079d1e1e9a6c3...,311296,
1,Microsoft.Windows.UniversalCRT.Redistributable...,Msi,https://download.visualstudio.microsoft.com/do...,9126f6ff98d955951fe9323f4444c119.cab,cab,c:\Users\Dhika\Documents\Projects\Proto\Python...,c:\Users\Dhika\Documents\Projects\Proto\Python...,39fac0fec9d1380b92108ef1ff1d53e267b73559e3f7e2...,455640,
2,Microsoft.Windows.UniversalCRT.Redistributable...,Msi,https://download.visualstudio.microsoft.com/do...,948a611cd2aca64b1e5113ffb7b95d5f.cab,cab,c:\Users\Dhika\Documents\Projects\Proto\Python...,c:\Users\Dhika\Documents\Projects\Proto\Python...,dee09bf61b630506087d8d1ca59c0147d47c037152035b...,625357,
3,Microsoft.Windows.UniversalCRT.Redistributable...,Msi,https://download.visualstudio.microsoft.com/do...,fef2cfedd6135e0ed85290b83f3682c3.cab,cab,c:\Users\Dhika\Documents\Projects\Proto\Python...,c:\Users\Dhika\Documents\Projects\Proto\Python...,4e9a69b016d75b2520b975eee411c7049e0dcc96acd683...,603424,
4,Microsoft.VisualStudio.NativeImageSupport,Vsix,https://download.visualstudio.microsoft.com/do...,payload.vsix,vsix,c:\Users\Dhika\Documents\Projects\Proto\Python...,c:\Users\Dhika\Documents\Projects\Proto\Python...,07e123c720d64c72a3649eb3039968c0637f9373248c3d...,24010,
...,...,...,...,...,...,...,...,...,...,...
592,Microsoft.VisualStudio.PerformanceProvider,Vsix,https://download.visualstudio.microsoft.com/do...,payload.vsix,vsix,c:\Users\Dhika\Documents\Projects\Proto\Python...,c:\Users\Dhika\Documents\Projects\Proto\Python...,6b498931bb9fc43e0188a62ab54702db428a1ff3fdadd2...,40714,
593,Microsoft.VisualStudio.WebSiteProject.DTE,Vsix,https://download.visualstudio.microsoft.com/do...,payload.vsix,vsix,c:\Users\Dhika\Documents\Projects\Proto\Python...,c:\Users\Dhika\Documents\Projects\Proto\Python...,fcc30b183931189881148d61f75a6bd2b096b2fe238c37...,48950,
594,Microsoft.VisualStudio.Community.ProductArch.R...,Vsix,https://download.visualstudio.microsoft.com/do...,payload.vsix,vsix,c:\Users\Dhika\Documents\Projects\Proto\Python...,c:\Users\Dhika\Documents\Projects\Proto\Python...,991245a81865ef38bc2df8e2273fd428d94e59cf8284c4...,41073,
595,Microsoft.VisualStudio.Debugger.Script.Remote....,Vsix,https://download.visualstudio.microsoft.com/do...,payload.vsix,vsix,c:\Users\Dhika\Documents\Projects\Proto\Python...,c:\Users\Dhika\Documents\Projects\Proto\Python...,73f1ad57fe434a73571937a51b6988ffc55b68055b23b3...,10609,


In [136]:
if not vs_package_download:
    # Choosing yes will also verify all local packages
    # If the package already exist and valid, it won't be redownloaded
    vs_package_download = input('Do want to download/verify all the packages (Y/N)?')
    if lower(vs_package_download) != 'y':
        vs_package_download = False
        # raise KeyboardInterrupt('Packages auto-download is blocked by user')

In [137]:
from urllib import request
from tqdm import tqdm

# I'm just copying the code from here
# https://github.com/tqdm/tqdm#hooks-and-callbacks
class TqdmUpTo(tqdm):
    def progress_bar(self, b = 1, bsize = 1, tsize = None):
        if tsize is not None: self.total = tsize
        return self.update(b * bsize - self.n)

# Doesn't support pause and resume!
# Download is also single thread only for now
def download_file(url, file_path, desc = None):
    tqdm_kwargs = {
        'unit': 'B',
        'unit_scale': True,
        'unit_divisor': 1024,
        'miniters': 1,
        'desc': desc
    }

    with TqdmUpTo(**tqdm_kwargs) as t:
        request.urlretrieve(url, file_path, reporthook = t.progress_bar)
        t.total = t.n

if vs_package_download:
    # Package id of the current payload
    pkg_id = None
    # Auto-clear cell output
    # Will be disabled if an error is found
    auto_clear = True

    for i in pyl_list:
        # Quick access
        package_id = i['packageId']
        url = i['url']
        file_name = i['fileName']
        target_dir = i['targetDir']
        file_path = i['filePath']
        sha256 = i['sha256']

        # A single package can have multiple payloads 
        if package_id != pkg_id:
            pkg_id = package_id
            print(f'\n** {pkg_id} **')

        # File checksum
        correct_file = False
        # Maximum redownload tries
        tries = 3

        # Check file existence
        if os.path.isfile(file_path):
            correct_file = check_sha256sum(file_path, sha256)
        else:
            parent_dir = os.path.dirname(file_path)
            # Create (nested) directories for parent dir
            os.makedirs(parent_dir, exist_ok = True)

        # Don't redownload file if the checksum is correct
        while not correct_file and tries > 0:
            print(f'Downloading {file_name}... (tries left: {tries})')
            download_file(url, file_path, desc = file_name)
            correct_file = check_sha256sum(file_path, sha256)
            if not correct_file: tries -= 1

        if not correct_file:
            print(f'No tries left, {file_name} checksum is incorrect!')
            # Disable auto-clear
            auto_clear = False
        else:
            print(f'{file_name} checksum is valid!')

        if auto_clear: clear_output(wait = True)


** Microsoft.VisualStudio.Diagnostics.AspNetHelper **
payload.vsix checksum is valid!


In [138]:
import shutil

# Payload target directories
pyl_dir = { i['targetDir'] for i in pyl_list }

# Check for outdated packages
# By comparing existing folders with payloads list
if vs_package_del_outdated:
    for i in os.scandir(cache_path):
        if i.is_dir():
            if i.path not in pyl_dir:
                print(f'Removed {i.name}')
                # This may be dangerous
                shutil.rmtree(i.path)

## Extract files (installation)

In [149]:
from urllib import parse
import zipfile

def write_file(file_bytes: bytes, file_path: str, package_id: str):
    # Replace forward slash with backslash (if any)
    # "makedirs" and "write" may fail on Windows if forward slash exist(s)
    file_path = file_path.replace('/', '\\')
    # Ensure parent directories exist
    file_dir = os.path.dirname(file_path)
    os.makedirs(file_dir, exist_ok = True)
    # Existing file will be overwritten
    with open(file_path, 'wb') as f:
        f.write(file_bytes)

# Root files/folders to ignore if vsix package has "extensionDir"
# All case should be lower case to avoid missmatch
vsix_ext_ignore = (
    'package/',
    '_rels/',
    'manifest.json',
    lower('[Content_Types].xml'),
    lower('Contents/')
)

pkg_id = None
for i in pyl_list:
    package_id = i['packageId']
    package_type = i['packageType']
    file_name = i['fileName']
    file_path = i['filePath']
    file_type = i['fileType']

    if lower(package_type) == 'vsix':
        with zipfile.ZipFile(file_path) as z:
            has_contents = False
            for name in z.namelist():
                if lower(name).startswith('contents/'):
                    has_contents = True
                    if vs_package_extract:
                        # Decode URL like path (e.g. %20 to space)
                        d_name = parse.unquote(name)
                        # Remove "contents" (parent folder)
                        d_name = d_name[len("contents/"):]
                        # Can't be extracted directly due to URL like path
                        file = z.read(name)
                        write_file(file, f'{extract_path}\\{d_name}', package_id)
        if not has_contents:
            if i.get('extensionDir'):
                ext_dir = i['extensionDir']
                if ext_dir == None: ext_dir = ''

                if lower(ext_dir).startswith('[installdir]'):
                    # Correct installation directory
                    ext_dir = ext_dir.replace('[installdir]', extract_path)
                    with zipfile.ZipFile(file_path) as z:
                        for name in z.namelist():
                            if not lower(name).startswith(vsix_ext_ignore):
                                if vs_package_extract:
                                    d_name = parse.unquote(name)
                                    # Use "extensionDir" as install path
                                    file = z.read(name)
                                    write_file(file, f'{ext_dir}\\{d_name}', package_id)
                else:
                    print(f'$ "{package_id}" payload ({file_name}) doesn\'t have "installdir" path')
            else:
                # Remove this message if it's annoying
                # You can re-enable it to troubleshoot if you miss some important files
                # print(f'* "{package_id}" payload ({file_name}) doesn\'t have "Contents" folder')
                pass
    else:
        # A single package id can have multiple payloads
        # Only reprint info if the previous id doesn't match
        if package_id != pkg_id:
            pkg_id = package_id
            # This package may need to be inspected and extracted manually
            # You can use some tools like Sandboxie or Universal Extractor
            print(f'# "{package_id}" type ({package_type}) is unsupported!')

# "Microsoft.Windows.UniversalCRT.Redistributable.Msi" type (Msi) is unsupported!
# "Microsoft.Build.FileTracker.Msi" type (Msi) is unsupported!
# "Microsoft.VisualCpp.CRT.ClickOnce.Msi" type (Msi) is unsupported!
# "Win11SDK_10.0.22621" type (Exe) is unsupported!
# "Microsoft.Build.UnGAC" type (Exe) is unsupported!
# "Microsoft.VisualStudio.Setup.Configuration" type (Msi) is unsupported!
# "Microsoft.VisualStudio.Setup.WMIProvider" type (Msi) is unsupported!
# "Microsoft.VisualStudio.Initializer" type (Exe) is unsupported!
# "Microsoft.VisualStudio.GitHubProtocolHandler.Msi" type (Msi) is unsupported!
# "Microsoft.VisualStudio.Debugger.Script.Msi" type (Msi) is unsupported!
# "CoreEditorFonts" type (Msi) is unsupported!
# "Microsoft.WebView2" type (Exe) is unsupported!
# "Microsoft.VisualStudio.MinShell.Interop.Shared.Msi" type (Msi) is unsupported!
# "Microsoft.VisualStudio.Community.Msi.Resources" type (Msi) is unsupported!
# "Microsoft.Net.6.Runtime" type (Zip) is unsupported!
# "M

# Install Windows SDK
- Windows SDK is universal for all archs, there may be files you want to delete manually (e.g. arm64 files)

In [145]:
# Parameters splitting (designed for Unix shell)
# May broke Windows path, check result manually
import shlex
# For running Windows SDK installer
import subprocess
# For moving (renaming) files and folders
import shutil

# Regex may incorrectly interpret some characters as special search pattern
# It's bad for replacing Windows path which contains a lot of backslashes
# Code taken from: https://stackoverflow.com/a/4773614
def ireplace(text, old, new):
    """ Case-insensitive string replace """
    idx = 0
    while idx < len(text):
        index_l = text.lower().find(old.lower(), idx)
        if index_l == -1: return text
        text = text[:index_l] + new + text[index_l + len(old):]
        idx = index_l + len(new) 
    return text

def run_sdk_installer(installer_path: str, installer_params: str):
    # Known alias in parameters
    replace_params = {
        '[LogFile]': f'{extract_path}\\temp\\win_sdk_setup.log',
        '[CEIPConsentOnOff]': 'off',
        '[ProgramFilesOrSharedDrive]': f'{extract_path}\\temp'
    }

    # Replace known alias
    for key, val in replace_params.items():
        installer_params = ireplace(installer_params, key, val)

    print(f'Installer: {installer_path}')
    print(f'Params: {shlex.split(installer_params)}')
    print(f'Please wait, (un)installer is already running...\n')

    # Run installer/uninstaller and wait until finished
    os.makedirs(f'{extract_path}\\temp', exist_ok = True)
    subprocess.check_call([installer_path, *shlex.split(installer_params)])

if vs_package_win_sdk:
    for i in pyl_list:
        # Search for pattern like this: Win11SDK_10.0.22621
        sdk_id = re.match(r'^(win\d+sdk_\d+\.\d+\.\d+)$', lower(i['packageId']))
        if sdk_id: sdk_id = sdk_id.group(1)
        else: continue

        # Set values only if it's Windows SDK
        package_id = i['packageId']
        package_type = i['packageType']
        file_name = i['fileName']
        file_path = i['filePath']
        file_type = i['fileType']

        # "pyl_info" doesn't contain all necessary info
        # Get full info from "dp_final" (like manifest)
        package_info = dp_final[sdk_id]

        # Installer file and parameters
        ins_file_name = package_info['installParams']['fileName']
        ins_params = package_info['installParams']['parameters']
        # Skip if payload file name doesn't match installer file name
        if not lower(ins_file_name) == lower(file_name): continue
        # Install Windows SDK
        run_sdk_installer(file_path, ins_params)

        # Move Windows SDK files from temporary directory
        # Currently, destination directory is set to "extract_path"
        sdk_files = os.listdir(f'{extract_path}\\temp')
        for file in sdk_files:
            dest = f'{extract_path}\\{file}'
            # Remove destination file/folder if already exists
            if os.path.exists(dest):
                if os.path.isfile(dest): os.remove(dest)
                else:
                    del_dir = input(f'"{dest}" already exists, delete it (Y/N)?')
                    if lower(del_dir) == 'y': shutil.rmtree(dest)
            # Move file/folder to destination directory
            shutil.move(f'{extract_path}\\temp\\{file}', extract_path)

        # Uninstaller file and parameters
        unins_file_name = package_info['uninstallParams']['fileName']
        unins_params = package_info['uninstallParams']['parameters']
        # Skip if payload file name doesn't match uninstaller file name
        if not lower(unins_file_name) == lower(file_name): continue
        # Uninstall Windows SDK
        run_sdk_installer(file_path, unins_params)

Installer: c:\Users\Dhika\Documents\Projects\Proto\Python\VSRipper\packages\Win11SDK_10.0.22621,version=10.0.22621.4,productArch=neutral\WinSdkInstaller.exe
Params: ['SetupExe=winsdksetup.exe', 'LogFile=C:\\BuildTools\\temp\\win_sdk_setup.log', 'SetupLogFolder=windowssdk', 'CeipSetting=off', 'ProgramFilesOrSharedDriveSdkPath=C:\\BuildTools\\temp\\Windows Kits\\10', 'SetupParameters=/features OptionId.AvrfExternal OptionId.UWPManaged OptionId.WindowsSoftwareLogoToolkit OptionId.SigningTools OptionId.UWPLocalized OptionId.UWPCPP OptionId.DesktopCPPx64 OptionId.DesktopCPPx86 OptionID.DesktopCPPARM OptionID.DesktopCPPARM64 OptionId.MSIInstallTools /quiet /norestart']

Installer: c:\Users\Dhika\Documents\Projects\Proto\Python\VSRipper\packages\Win11SDK_10.0.22621,version=10.0.22621.4,productArch=neutral\WinSdkInstaller.exe
Params: ['SetupExe=winsdksetup.exe', 'LogFile=C:\\BuildTools\\temp\\win_sdk_setup.log', 'SetupLogFolder=windowssdk', 'CeipSetting=off', 'ProgramFilesOrSharedDriveSdkPath=

# Set environment variables
- To be implemented, use [this](https://gist.github.com/mmozeiko/7f3162ec2988e81e56d5c4e22cde9977) for now

# Set registry keys
- Flutter check Windows SDK install folder from registry, the rest is from `vswhere`