### Datasets

In [7]:
from datasets import load_dataset
data = {}
data['en'] = load_dataset('Davlan/sib200', 'eng_Latn', cache_dir='./huggingface/sib200') 
data['fr'] = load_dataset('Davlan/sib200', 'fra_Latn', cache_dir='./huggingface/sib200') 
data['id'] = load_dataset('Davlan/sib200', 'ind_Latn', cache_dir='./huggingface/sib200') 
data['jv'] = load_dataset('Davlan/sib200', 'jav_Latn', cache_dir='./huggingface/sib200') 
data['su'] = load_dataset('Davlan/sib200', 'sun_Latn', cache_dir='./huggingface/sib200') 
data['kr'] = load_dataset('Davlan/sib200', 'kor_Hang', cache_dir='./huggingface/sib200') 
data['jp'] = load_dataset('Davlan/sib200', 'jpn_Jpan', cache_dir='./huggingface/sib200') 

In [8]:
import pandas as pd
def convert_to_dataframe(data):
    train_df = data['train'].to_pandas()
    val_df = data['validation'].to_pandas()
    test_df = data['test'].to_pandas()

    full_df = pd.concat([train_df, val_df, test_df], ignore_index=True)
    return full_df

for key in data.keys():
    data[key] = convert_to_dataframe(data[key])

In [9]:
print(data['en']['category'].unique())

['geography' 'science/technology' 'entertainment' 'politics' 'health'
 'travel' 'sports']


In [None]:
import torch
import os
import pandas as pd
from transformers import AutoModelForCausalLM, AutoProcessor

folder_name = "Qwen/Qwen3-8B"

model = AutoModelForCausalLM.from_pretrained(
    folder_name,
    cache_dir=f"./huggingface/{folder_name}",
    trust_remote_code=True
)

processor = AutoProcessor.from_pretrained(
    folder_name,
    cache_dir=f"./huggingface/{folder_name}",
    trust_remote_code=True
)

# def hook_fn(m, i, o, layer_id):
#     os.makedirs(save_dir, exist_ok=True)
#     save_path = os.path.join(save_dir, f"{layer_id}.pt")
#     torch.save(o[0][-1, :].detach().cpu(), save_path)

# for i, layer in enumerate(model.model.layers):
#     layer.register_forward_hook(
#         lambda m, i, o, layer_id=i: hook_fn(m, i, o, layer_id=layer_id)
#     )

### Extraction

In [None]:
import torch
import os
import pandas as pd
from transformers import AutoModelForCausalLM, AutoProcessor

folder_name = "meta-llama/Llama-3.2-1B"

model = AutoModelForCausalLM.from_pretrained(
    folder_name,
    cache_dir=f"./huggingface/{folder_name}",
    trust_remote_code=True
)

processor = AutoProcessor.from_pretrained(
    folder_name,
    cache_dir=f"./huggingface/{folder_name}",
    trust_remote_code=True
)

def hook_fn(m, i, o, layer_id):
    os.makedirs(save_dir, exist_ok=True)
    save_path = os.path.join(save_dir, f"{layer_id}.pt")
    torch.save(o[0][-1, :].detach().cpu(), save_path)

for i, layer in enumerate(model.model.layers):
    layer.register_forward_hook(
        lambda m, i, o, layer_id=i: hook_fn(m, i, o, layer_id=layer_id)
    )

for language in ['en', 'fr', 'id', 'jv', 'su', 'kr', 'jp']:
    for _, row in data[language].iterrows():
        text_id = str(row['index_id'])

        save_dir = f"./activations/sib200/{folder_name}/raw/{language}/{text_id}/"
        inputs = processor(text=row['text'], return_tensors="pt")

        with torch.no_grad(): 
            outputs = model(**inputs)
            print(language, text_id)

        save_dir = f"./activations/sib200/{folder_name}/classification-en/{language}/{text_id}/"
        inputs = processor(text=f"""Classify the topic of the following text. Choose exactly one of the following topics:

- geography
- science/technology
- entertainment
- politics
- health
- travel
- sports

Text: {row['text']}

Topic:""", return_tensors="pt")

        with torch.no_grad(): 
            outputs = model(**inputs)
            print(language, text_id)

### Processed

#### Language

In [None]:
# raw
import torch
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.manifold import TSNE
from matplotlib.lines import Line2D

models = [{'name': 'meta-llama/Llama-3.2-1B', 'layers': 16}]
languages = ['en', 'fr', 'id', 'jp', 'jv', 'su', 'kr']

for model_id in range (len(models)):
    cmap = plt.get_cmap('tab10')
    color_map = {language: cmap(i) for i, language in enumerate(languages)}

    fig, axes = plt.subplots(int(models[model_id]['layers']/8), 8, figsize=(32, int(models[model_id]['layers']/2)))
    axes = axes.flatten()  

    for layer in range(models[model_id]['layers']):
        label_language = []
        latent = []

        for current_language in languages:
            base_path = f"./activations/sib200/{models[model_id]['name']}/raw/{current_language}"
            for text_id in os.listdir(base_path):
                text_path = os.path.join(base_path, text_id)
                if not os.path.isdir(text_path):
                    continue
                path = os.path.join(text_path, f"{layer}.pt")
                activation_values = torch.load(path)
                latent.append(activation_values.to(torch.float32).numpy())
                label_language.append(current_language)

        latent = np.array(latent)
        tsne = TSNE(n_components=2, random_state=42)
        latent_2d = tsne.fit_transform(latent)

        ax = axes[layer]
        for lang in languages:
            indices = [i for i, lbl in enumerate(label_language) if lbl == lang]
            ax.scatter(latent_2d[indices, 0], latent_2d[indices, 1],
                       label=lang, color=color_map[lang], alpha=0.6, s=10)

        ax.set_title(f"Layer {layer}", fontsize=10)

    legend_elements = [Line2D([0], [0], marker='o', color='w',
                          label=lang, markerfacecolor=color_map[lang],
                          markersize=8, alpha=0.6) for lang in languages]
    plt.tight_layout(rect=[0, 0, 1, 0.90]) 
    fig.legend(handles=legend_elements,
            loc='upper center', bbox_to_anchor=(0.5, 1.02),
            ncol=len(languages), title='Languages')

    plt.show()

In [None]:
# classification-en

import torch
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.manifold import TSNE
from matplotlib.lines import Line2D

models = [{'name': 'meta-llama/Llama-3.2-1B', 'layers': 16}]
languages = ['en', 'fr', 'id', 'jp', 'jv', 'su', 'kr']

for model_id in range (len(models)):
    cmap = plt.get_cmap('tab10')
    color_map = {language: cmap(i) for i, language in enumerate(languages)}

    fig, axes = plt.subplots(int(models[model_id]['layers']/8), 8, figsize=(32, int(models[model_id]['layers']/2)))
    axes = axes.flatten()  

    for layer in range(models[model_id]['layers']):
        label_language = []
        latent = []

        for current_language in languages:
            base_path = f"./activations/sib200/{models[model_id]['name']}/classification-en/{current_language}"
            for text_id in os.listdir(base_path):
                text_path = os.path.join(base_path, text_id)
                if not os.path.isdir(text_path):
                    continue
                path = os.path.join(text_path, f"{layer}.pt")
                activation_values = torch.load(path)
                latent.append(activation_values.to(torch.float32).numpy())
                label_language.append(current_language)

        latent = np.array(latent)
        tsne = TSNE(n_components=2, random_state=42)
        latent_2d = tsne.fit_transform(latent)

        ax = axes[layer]
        for lang in languages:
            indices = [i for i, lbl in enumerate(label_language) if lbl == lang]
            ax.scatter(latent_2d[indices, 0], latent_2d[indices, 1],
                       label=lang, color=color_map[lang], alpha=0.6, s=10)

        ax.set_title(f"Layer {layer}", fontsize=10)

    legend_elements = [Line2D([0], [0], marker='o', color='w',
                          label=lang, markerfacecolor=color_map[lang],
                          markersize=8, alpha=0.6) for lang in languages]
    plt.tight_layout(rect=[0, 0, 1, 0.90]) 
    fig.legend(handles=legend_elements,
            loc='upper center', bbox_to_anchor=(0.5, 1.02),
            ncol=len(languages), title='Languages')

    plt.show()

#### Topic

In [None]:
#raw

import torch
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.manifold import TSNE
from matplotlib.lines import Line2D

models = [{'name': 'meta-llama/Llama-3.2-1B', 'layers': 16}]
languages = ['en', 'fr', 'id', 'jp', 'jv', 'su', 'kr']

for model_id in range (len(models)):
    for current_language in languages:
        base_path = f"./activations/sib200/{models[model_id]['name']}/raw/{current_language}"
        current_data = data[current_language]
        
        cmap = plt.get_cmap('tab10')
        color_map = {topic: cmap(i) for i, topic in enumerate(current_data['category'].unique())}

        fig, axes = plt.subplots(int(models[model_id]['layers']/8), 8, figsize=(32, int(models[model_id]['layers']/2)))
        axes = axes.flatten()  

        for layer in range(models[model_id]['layers']):
            label_language = []
            latent = []

            for text_id in os.listdir(base_path):
                text_path = os.path.join(base_path, text_id)
                if not os.path.isdir(text_path):
                    continue
                path = os.path.join(text_path, f"{layer}.pt")
                activation_values = torch.load(path)
                latent.append(activation_values.to(torch.float32).numpy())
                label_language.append(current_data[current_data['index_id'].astype(str) == text_id]['category'].values[0])

            latent = np.array(latent)
            tsne = TSNE(n_components=2, random_state=42)
            latent_2d = tsne.fit_transform(latent)

            ax = axes[layer]
            for label in current_data['category'].unique():
                indices = [i for i, lbl in enumerate(label_language) if lbl == label]
                ax.scatter(latent_2d[indices, 0], latent_2d[indices, 1],
                        label=label, color=color_map[label], alpha=0.6, s=10)

            ax.set_title(f"Layer {layer}", fontsize=10)

        legend_elements = [Line2D([0], [0], marker='o', color='w',
                          label=label, markerfacecolor=color_map[label],
                          markersize=8, alpha=0.6) for label in current_data['category'].unique()]
        plt.tight_layout(rect=[0, 0, 1, 0.90]) 
        fig.legend(handles=legend_elements,
                loc='upper center', bbox_to_anchor=(0.5, 1.02),
                ncol=len(current_data['category'].unique()), title='Label')

        plt.show()

In [None]:
# classification-en

import torch
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.manifold import TSNE
from matplotlib.lines import Line2D

models = [{'name': 'meta-llama/Llama-3.2-1B', 'layers': 16}]
languages = ['en', 'fr', 'id', 'jp', 'jv', 'su', 'kr']

for model_id in range (len(models)):
    for current_language in languages:
        base_path = f"./activations/sib200/{models[model_id]['name']}/classification-en/{current_language}"
        current_data = data[current_language]
        
        cmap = plt.get_cmap('tab10')
        color_map = {topic: cmap(i) for i, topic in enumerate(current_data['category'].unique())}

        fig, axes = plt.subplots(int(models[model_id]['layers']/8), 8, figsize=(32, int(models[model_id]['layers']/2)))
        axes = axes.flatten()  

        for layer in range(models[model_id]['layers']):
            label_language = []
            latent = []

            for text_id in os.listdir(base_path):
                text_path = os.path.join(base_path, text_id)
                if not os.path.isdir(text_path):
                    continue
                path = os.path.join(text_path, f"{layer}.pt")
                activation_values = torch.load(path)
                latent.append(activation_values.to(torch.float32).numpy())
                label_language.append(current_data[current_data['index_id'].astype(str) == text_id]['category'].values[0])

            latent = np.array(latent)
            tsne = TSNE(n_components=2, random_state=42)
            latent_2d = tsne.fit_transform(latent)

            ax = axes[layer]
            for label in current_data['category'].unique():
                indices = [i for i, lbl in enumerate(label_language) if lbl == label]
                ax.scatter(latent_2d[indices, 0], latent_2d[indices, 1],
                        label=label, color=color_map[label], alpha=0.6, s=10)

            ax.set_title(f"Layer {layer}", fontsize=10)

        legend_elements = [Line2D([0], [0], marker='o', color='w',
                          label=label, markerfacecolor=color_map[label],
                          markersize=8, alpha=0.6) for label in current_data['category'].unique()]
        plt.tight_layout(rect=[0, 0, 1, 0.90]) 
        fig.legend(handles=legend_elements,
                loc='upper center', bbox_to_anchor=(0.5, 1.02),
                ncol=len(current_data['category'].unique()), title='Label')

        plt.show()

#### Topic-Merged

In [None]:
#raw

import torch
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.manifold import TSNE
from matplotlib.lines import Line2D

models = [{'name': 'meta-llama/Llama-3.2-1B', 'layers': 16}]
languages = ['en', 'fr', 'id', 'jp', 'jv', 'su', 'kr']

for model_id in range (len(models)):
    fig, axes = plt.subplots(int(models[model_id]['layers']/8), 8, figsize=(32, int(models[model_id]['layers']/2)))
    axes = axes.flatten()  

    for layer in range(models[model_id]['layers']):
        label_language = []
        latent = []

        for current_language in languages:
            base_path = f"./activations/sib200/{models[model_id]['name']}/raw/{current_language}"
            current_data = data[current_language]
            
            cmap = plt.get_cmap('tab10')
            color_map = {topic: cmap(i) for i, topic in enumerate(current_data['category'].unique())}

            for text_id in os.listdir(base_path):
                text_path = os.path.join(base_path, text_id)
                if not os.path.isdir(text_path):
                    continue
                path = os.path.join(text_path, f"{layer}.pt")
                activation_values = torch.load(path)
                latent.append(activation_values.to(torch.float32).numpy())
                label_language.append(current_data[current_data['index_id'].astype(str) == text_id]['category'].values[0])

        latent = np.array(latent)
        tsne = TSNE(n_components=2, random_state=42)
        latent_2d = tsne.fit_transform(latent)

        ax = axes[layer]
        for label in current_data['category'].unique():
            indices = [i for i, lbl in enumerate(label_language) if lbl == label]
            ax.scatter(latent_2d[indices, 0], latent_2d[indices, 1],
                    label=label, color=color_map[label], alpha=0.6, s=10)

        ax.set_title(f"Layer {layer}", fontsize=10)

    legend_elements = [Line2D([0], [0], marker='o', color='w',
                        label=label, markerfacecolor=color_map[label],
                        markersize=8, alpha=0.6) for label in current_data['category'].unique()]
    plt.tight_layout(rect=[0, 0, 1, 0.90]) 
    fig.legend(handles=legend_elements,
            loc='upper center', bbox_to_anchor=(0.5, 1.02),
            ncol=len(current_data['category'].unique()), title='Label')

    plt.show()

In [None]:
#classification-en

import torch
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.manifold import TSNE
from matplotlib.lines import Line2D

models = [{'name': 'meta-llama/Llama-3.2-1B', 'layers': 16}]
languages = ['en', 'fr', 'id', 'jp', 'jv', 'su', 'kr']

for model_id in range (len(models)):
    fig, axes = plt.subplots(int(models[model_id]['layers']/8), 8, figsize=(32, int(models[model_id]['layers']/2)))
    axes = axes.flatten()  

    for layer in range(models[model_id]['layers']):
        label_language = []
        latent = []

        for current_language in languages:
            base_path = f"./activations/sib200/{models[model_id]['name']}/classification-en/{current_language}"
            current_data = data[current_language]
            
            cmap = plt.get_cmap('tab10')
            color_map = {topic: cmap(i) for i, topic in enumerate(current_data['category'].unique())}

            for text_id in os.listdir(base_path):
                text_path = os.path.join(base_path, text_id)
                if not os.path.isdir(text_path):
                    continue
                path = os.path.join(text_path, f"{layer}.pt")
                activation_values = torch.load(path)
                latent.append(activation_values.to(torch.float32).numpy())
                label_language.append(current_data[current_data['index_id'].astype(str) == text_id]['category'].values[0])

        latent = np.array(latent)
        tsne = TSNE(n_components=2, random_state=42)
        latent_2d = tsne.fit_transform(latent)

        ax = axes[layer]
        for label in current_data['category'].unique():
            indices = [i for i, lbl in enumerate(label_language) if lbl == label]
            ax.scatter(latent_2d[indices, 0], latent_2d[indices, 1],
                    label=label, color=color_map[label], alpha=0.6, s=10)

        ax.set_title(f"Layer {layer}", fontsize=10)

    legend_elements = [Line2D([0], [0], marker='o', color='w',
                        label=label, markerfacecolor=color_map[label],
                        markersize=8, alpha=0.6) for label in current_data['category'].unique()]
    plt.tight_layout(rect=[0, 0, 1, 0.90]) 
    fig.legend(handles=legend_elements,
            loc='upper center', bbox_to_anchor=(0.5, 1.02),
            ncol=len(current_data['category'].unique()), title='Label')

    plt.show()