In [None]:
from transformers import AutoModel, AutoTokenizer
from transformers import HfApi, HfFolder
import torch
import numpy as np
from sklearn.cluster import KMeans

# Load a Diffusion Transformer Model from Hugging Face Hub
model_name = "CompVis/stable-diffusion-v1-4"  # Example model; replace with a DiT model
model = AutoModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# VQ4DiT quantization function
def vq4dit_quantize(model):
    for name, param in model.named_parameters():
        if param.requires_grad and len(param.shape) > 1:  # Quantize only weight matrices
            weight = param.data.cpu().numpy()
            n_clusters = min(256, weight.shape[0] * weight.shape[1])  # Adjust as per paper

            # K-Means Clustering to create codebook
            kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(weight.reshape(-1, 1))
            codebook = kmeans.cluster_centers_.flatten()
            assignments = kmeans.labels_.reshape(weight.shape)

            # Reconstruct quantized weight
            quantized_weight = codebook[assignments]
            param.data = torch.from_numpy(quantized_weight).to(param.device)

    return model

# Apply VQ4DiT quantization to the model
quantized_model = vq4dit_quantize(model)

# Save the quantized model
quantized_model.save_pretrained("./quantized_model")

# Push the quantized model to Hugging Face Hub
api = HfApi()
username = api.whoami()["name"]  # Assumes that you are already logged in with `huggingface-cli login`
repo_id = f"{username}/quantized-{model_name.split('/')[-1]}"
api.create_repo(repo_id, private=False)
quantized_model.push_to_hub(repo_id, use_auth_token=HfFolder.get_token())
tokenizer.push_to_hub(repo_id, use_auth_token=HfFolder.get_token())

print("Quantized model successfully pushed to Hugging Face Hub.")
