In [None]:
import os
import json
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
import umap # Requires: pip install umap-learn

from safetensors import safe_open
# from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, LlavaForConditionalGeneration

In [None]:
ROOT_MODEL_DIRECTORY="data/local_llms"

def load_index(model_name):
    model_directory = os.path.join(ROOT_MODEL_DIRECTORY, model_name)
    # check is bin file or is safetensors file
    if "model.safetensors.index.json" in os.listdir(model_directory):
        with open(os.path.join(model_directory, "model.safetensors.index.json")) as f:
            index = json.load(f)
    elif "pytorch_model.bin.index.json" in os.listdir(model_directory):
        with open(os.path.join(model_directory, "pytorch_model.bin.index.json")) as f:
            index = json.load(f)
    return index

def load_tensors(model_name, load_weight_names=None):
    model_directory = os.path.join(ROOT_MODEL_DIRECTORY, model_name)
    # check is bin file or is safetensors file
    if "model.safetensors.index.json" in os.listdir(model_directory):
        with open(os.path.join(model_directory, "model.safetensors.index.json")) as f:
            index = json.load(f)
        
        # process map file
        tensor_file_map = index.get("weight_map")
        if tensor_file_map is None:
            raise AssertionError("Safetensors index file has no weight map.")
            
        # load necessary weights if specified
        if load_weight_names:
            # find required files
            required_files = []
            for name in load_weight_names:
                # only add "language model" when loading from multimodal model
                if name not in tensor_file_map.keys():
                    name = "language_model." + name
                if name not in tensor_file_map.keys():
                    # raise AssertionError(f"{name} not found in {model_name}.")
                    replaced_name = name.replace("language_model.", "")
                    print(f"{replaced_name} not found in {model_name}.")
                    continue
                required_files.append(tensor_file_map[name])
            required_files = list(set(required_files))
                
            # open required files
            weight_map = {}
            for file in required_files:
                with safe_open(os.path.join(model_directory, file), framework="pt") as f:
                    for k in f.keys():
                        if k in load_weight_names:
                            weight_map[k] = f.get_tensor(k)
                        else:
                            k_ = k.replace("language_model.", "")
                            if k_ in load_weight_names:
                                weight_map[k_] = f.get_tensor(k)
                            
        # load all tensors
        else:
            all_files = []
            for k in tensor_file_map.keys():
                all_files.append(tensor_file_map[k])
            all_files = list(set(all_files))
            
            # open files
            weight_map = {}
            for file in all_files:
                with safe_open(os.path.join(model_directory, file), framework="pt") as f:
                    for k in f.keys():
                        if k in load_weight_names:
                            weight_map[k] = f.get_tensor(k)
        return weight_map
    
    elif "pytorch_model.bin.index.json" in os.listdir(model_directory):
        with open(os.path.join(model_directory, "pytorch_model.bin.index.json")) as f:
            index = json.load(f)
        
        # process map file
        tensor_file_map = index.get("weight_map")
        if tensor_file_map is None:
            raise AssertionError("Safetensors index file has no weight map.")
            
        # load necessary weights if specified
        if load_weight_names:
            # find required files
            required_files = []
            for name in load_weight_names:
                if name not in tensor_file_map.keys():
                    name = "language_model." + name
                if name not in tensor_file_map.keys():
                    # raise AssertionError(f"{name} not found in {model_name}.")
                    print(f"{name} not found in {model_name}.")
                    continue
                required_files.append(tensor_file_map[name])
            required_files = list(set(required_files))
                
            # open required files
            weight_map = {}
            for file in required_files:
                f = torch.load(os.path.join(model_directory, file))
                for k in f.keys():
                    if k in load_weight_names:
                        weight_map[k] = f.get_tensor(k)
                    else:
                        k_ = k.replace("load_weight_names", "")
                        if k_ in load_weight_names:
                            weight_map[k_] = f.get_tensor(k)
        # load all tensors
        else:
            all_files = []
            for k in tensor_file_map.keys():
                all_files.append(tensor_file_map[k])
            all_files = list(set(all_files))
            
            # open files
            weight_map = {}
            for file in all_files:
                f = torch.load(os.path.join(model_directory, file))
                for k in f.keys():
                    if k in load_weight_names:
                        weight_map[k] = f.get_tensor(k)
        return weight_map


In [None]:
# base_model = "Qwen2-merged-weighted-all"
# models = [
#     "ft-text",
#     "ft-image",
#     "ft-video",
#     "ft-mm"
# ]

base_model = "Qwen2-merged-weighted-all"
models = [
    "Qwen2-7B-Instruct",
    "Qwen2-VL-7B-Instruct",
    "LLaVA-Video-7B-Qwen2",
    "llava-onevision-qwen2-7b-si"
]

In [None]:
# load weights
model_index = load_index(base_model)
base_weights = load_tensors(base_model, model_index["weight_map"].keys())

expert_models = {}

for model in models:
    expert_weights = load_tensors(model, model_index["weight_map"].keys())
    expert_models[model] = expert_weights

In [None]:
def aggregate_fn(shift_tensor):
    return np.linalg.norm(shift_tensor)
    # Other options:
    # return np.mean(np.abs(shift_tensor))
    # return np.var(shift_tensor)

In [None]:
layer_names = list(base_weights.keys())
aggregated_shifts_list = []
model_order = []
for name, expert_w in expert_models.items():
    model_order.append(name)
    model_agg_vector = []
    for layer_name in layer_names:
        if layer_name in expert_w and layer_name in base_weights:
            shift = expert_w[layer_name] - base_weights[layer_name]
            agg_value = aggregate_fn(shift)
            model_agg_vector.append(agg_value)
        else:
            model_agg_vector.append(0) # Or handle missing layers appropriately

    aggregated_shifts_list.append(model_agg_vector)
    
aggregated_shifts_list.append([0] * len(layer_names)) # Add base model as zero vector
model_order.append(base_model)

stacked_agg_shifts = np.array(aggregated_shifts_list)

print(f"Stacked aggregated shifts into array of shape: {stacked_agg_shifts.shape}")

In [None]:
# --- 4. Optional: Scale the Data ---
# Scaling is often recommended before PCA and can help t-SNE/UMAP.
# scaler = StandardScaler()
# scaled_shifts = scaler.fit_transform(stacked_shifts)
# print("Applied StandardScaler to the shift vectors.")

# Choose which data to use for reduction:
# data_for_reduction = scaled_shifts # Use scaled data
# data_for_reduction = stacked_shifts # Or use unscaled data
data_for_reduction = stacked_agg_shifts

In [None]:
n_components = 3 # Reduce to 2 dimensions for plotting

In [None]:
# # PCA
# pca = PCA(n_components=n_components)
# pca_result = pca.fit_transform(data_for_reduction)
# print(f"\nPCA completed. Explained variance ratio: {pca.explained_variance_ratio_}")

In [None]:
# t-SNE
# Note: t-SNE hyperparameters (perplexity, learning_rate, n_iter) can significantly affect results.
# Adjust perplexity based on the number of points (models). A common range is 5-50.
perplexity_value = min(30, len(model_order) - 1) # Ensure perplexity < n_samples
tsne = TSNE(n_components=n_components, perplexity=perplexity_value, random_state=42, n_iter=1000)
tsne_result = tsne.fit_transform(data_for_reduction)
print(f"t-SNE completed (perplexity={perplexity_value}).")

In [None]:
# # UMAP
# # Note: UMAP hyperparameters (n_neighbors, min_dist) also affect results.
# n_neighbors_value = min(15, max(2, len(model_order) - 1)) # Ensure n_neighbors < n_samples
# umap_reducer = umap.UMAP(n_components=n_components, n_neighbors=n_neighbors_value, min_dist=0.1, random_state=42)
# umap_result = umap_reducer.fit_transform(data_for_reduction)
# print(f"UMAP completed (n_neighbors={n_neighbors_value}).")

In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # Import for 3D plotting
import numpy as np # Assuming numpy is used for tsne_result

fig = plt.figure(figsize=(18, 8)) # Adjusted figure size for 3D
ax = fig.add_subplot(111, projection='3d') # Create a 3D subplot

# plt.title('Fine-tuned Model Weight Shifts Relative to Base Model (3D)', fontsize=16)

result = tsne_result  # Use the result you want to plot (should have 3 components)

# Check if the result has at least 3 components
if result.shape[1] < 3:
    raise ValueError("The 'result' data must have at least 3 components for a 3D plot.")

# Scatter plot - using the 3D axes
# Using result[:, 0], result[:, 1], and result[:, 2] for x, y, z coordinates
ax.scatter(result[:, 0], result[:, 1], result[:, 2], s=100, alpha=0.8, label='Expert Models')

# Set axis labels
# ax.set_xlabel("Component 1")
# ax.set_ylabel("Component 2")
# ax.set_zlabel("Component 3") # Add Z-axis label

# Add grid (already part of 3D plots by default, but can customize)
ax.grid(True, linestyle='--', alpha=0.6)

# Add text labels to points in 3D
# Need to get current axis limits to calculate offset if desired,
# but direct offsetting might be simpler in 3D initially.
# For simplicity, a small constant offset is used here.
# You might need to adjust offsets based on your data range.
x_offset_3d = 0.1 * (result[:, 0].max() - result[:, 0].min())
y_offset_3d = 0.01 * (result[:, 1].max() - result[:, 1].min())
z_offset_3d = 0.01 * (result[:, 2].max() - result[:, 2].min())


for i, name in enumerate(model_order):
    ax.text(result[i, 0] - x_offset_3d,
            result[i, 1] + y_offset_3d,
            result[i, 2] + z_offset_3d,
            name,
            fontsize=10)

# Optional: Add a legend
# ax.legend() # In 3D, legend placement might need adjustment

# --- Adjust layout and save ---
# plt.tight_layout() # tight_layout might have issues with 3D plots sometimes.
                  # Manual adjustment of subplot parameters might be needed if overlapping.
# ax.view_init(elev=0., azim=-30) # Try different elevation and azimuth angles

# Save the figure BEFORE showing it
# Changed filename for the 3D version
plt.savefig("merge_tsne_3d_plt_interface.pdf", dpi=300, bbox_inches='tight')

# Show the plot
plt.show()