Convert pretrained Tensorflow models (.h5) to pytorch

In [None]:
import h5py
import numpy as np
import requests
import torch

In [None]:
def download_file(url, to):
    response = requests.get(url, stream=True)

    if response.status_code == 200:
        # Open a local file in write-binary mode and write the response content to it
        with open(to, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        print(f"File downloaded successfully: {to}")
    else:
        print(f"Failed to download file: {response.status_code}")

In [None]:
xception_url = (
    "https://github.com/fchollet/deep-learning-models/"
    "releases/download/v0.4/"
    "xception_weights_tf_dim_ordering_tf_kernels.h5"
)
xception_file = "../assets/xception_weights.h5"
download_file(xception_url, xception_file)

In [None]:
def flatten_h5(h5: h5py.File | h5py.Group) -> dict[str, h5py.Dataset]:
    contents: dict[str, h5py.Dataset] = {}
    for k, v in h5.items():
        if isinstance(v, (h5py.File, h5py.Group)):
            sub_contents = flatten_h5(v)
            contents.update(sub_contents)
        elif isinstance(v, h5py.Dataset):
            contents[k] = v
        else:
            raise ValueError(f"Unknown value", v)
    return contents

In [None]:
xception_h5: h5py.File = h5py.File(xception_file, "r")
xception_items = flatten_h5(xception_h5)
for k, v in xception_items.items():
    print(k, v.shape, v.dtype)
    v = np.array(v)  # convert to numpy first is much faster
    torch.tensor(v)