This notebook runs the functions to 

In [1]:
from neuron_pruning_main import *
import transformers
import torch
import os
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch.nn as nn
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [None]:
# Set environment variable to use specified GPUs
os.environ["CUDA_VISIBLE_DEVICES"] = "1,2,3"

In [None]:
# Specify the model ID
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Load the model with specified parameters
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    torch_dtype=torch.float16, 
    cache_dir='llm_weights', 
    low_cpu_mem_usage=True, 
    device_map="auto"
)

In [None]:
items = [
    "bicycle", "car", "bike", "motorcycle", "laptop", "sofa", "painting", "guitar", "refrigerator", "house", "book", "boat", "violin", "banjo",
    "necklace", "chair", "cellphone", "gold_chain"
    ]
training_items = ['violin',
 'banjo',
 'bicycle',
 'sofa',
 'painting',
 'motorcycle',
 'laptop',
 'necklace',
 'gold_chain',
 'book',
 'house',
 'refrigerator']
test_items = list(set(items) - set(training_items))

In [None]:
df = pd.read_csv('purchase_low_all_items.csv')

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

outputs = []

model_version = ['black', 'white']

In [None]:
process_model_layers(df, model, tokenizer)

In [None]:
# For specific models (pruning within each variation, not using common neurons)
for version in model_version:
    for item in items:
        pruned_model, pruned_tokenizer = prune_setDiff(model, tokenizer, item, version, top_white_percent=0.15, top_black_percent=0.15)
        outputs = generate_responses(df, pruned_model, pruned_tokenizer, terminators, num_iterations=100, temperature=0.6)
        df_results = pd.DataFrame(outputs)
        df_results.to_csv(f'full_prompts_purchase_results_pruning/setDiff_15_top_{version}_{item}.csv', index=False)

In [None]:
# For common model
for version in model_version:
    create_df_top_neurons_setDiff(model, variations, version, folder="scores_all", top_p_percent=0.15)

# files created in previous function
neuron_files = [f"{i}_pruned_{r}_15.pkl" for i, r in product(training_items, model_version)]

_, _ = compute_similar_neurons(neuron_files)

common_neuron_files = ['top15_w_neurons_12_items_training.csv',
         'top15_b_neurons_12_items_training.csv']

for common_neurons_file in common_neurons_files:
    pruned_common_model, pruned_common_tokenizer = prune_from_training_common_neurons(model, tokenizer, common_neurons_file)
    outputs_common = generate_responses(df, pruned_common_model, pruned_common_tokenizer, terminators, num_iterations=100, temperature=0.6)
    df_results_common = pd.DataFrame(outputs_common)
    df_results_common.to_csv(f'full_prompts_purchase_results_pruning/setDiff_15_top_{version}_{item}.csv', index=False)