In [None]:
!pip install -U FlagEmbedding
!pip install -q LM_Cocktail

In [None]:
import os 
os.environ["WANDB_MODE"] = "disabled" 

In [None]:
!torchrun --nproc_per_node 1 \
-m FlagEmbedding.reranker.run \
--output_dir  "./" \
--model_name_or_path "BAAI/bge-reranker-base" \
--train_data "./reRanker_train_dataset_cleaned.json" \
--learning_rate 6e-5 \
--fp16 \
--save_steps 3000 \
--num_train_epochs 5 \
--per_device_train_batch_size {8} \
--gradient_accumulation_steps 4 \
--dataloader_drop_last True \
--train_group_size 16 \
--max_len 512 \
--weight_decay 0.01 \
--logging_steps 10 

In [None]:
from LM_Cocktail import mix_models, mix_models_with_data
from FlagEmbedding import FlagReranker
 
model = mix_models(
    model_names_or_paths=["BAAI/bge-reranker-base", '/kaggle/working/'], 
    model_type='reranker', 
    weights=[0.5, 0.5],   
    output_path='./mixed_model'
)

reranker = FlagReranker('/kaggle/working/mixed_model', use_fp16=True) #use fp16 can speed up computing 

In [None]:
import zipfile
import os

def zip_folder(folder_path, zip_path):
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, _, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                zipf.write(file_path, os.path.relpath(file_path, folder_path))

# Example usage
folder_to_zip = '/kaggle/working/mixed_model'
zip_file_path = 'mixed_model.zip'
zip_folder(folder_to_zip, zip_file_path)


#### Testing

In [None]:
import re
ad = """Rest Lead to RCO Security - Join the Future of Security
RCO is on an exciting growth journey and is looking for our new senior system tester, who will also have test lead responsibility. Open the door to yours and our future and together we can drive the shift towards a more open, sustainable and safer society. 
What does the role entail? 
As a Test lead at RCO, you get the opportunity to develop and deepen your knowledge in cloud environments and automation tests with a mixture of hands-on test work and decision making. With us, you get everything under one roof. Awesome and competent colleagues; the newest technology; a chance to follow products from ideas to production; an exciting industry with the future in focus; and best of all - the chance to impact society to make it safer and more convenient. 🌟 
What is included in the daily work?  
·   10-20% test lead and decision making. 
·   Plan, design and run automation tests.  
·   Work with our test systems in the lab (i.e., building, modifying, install/upgrading in both SW and HW, as well as testing our cloud-based software).   
·   Creating and maintaining project’s guiding documents, test plan and test cases as well as related release activities. 
·   Highlighting the ongoing efforts within the project, specifically emphasizing the routine release testing conducted every three weeks.
Who are we looking for; 
We seek candidates with a relevant educational background and extensive experience in hardware- and software testing. You have experience in designing and reviewing test cases and requirements, test planning, executing tests and have experimented with exploratory testing and troubleshooting 🕵️‍♀️
With us, you will work in a team of developers and testers where personal drive, responsibility and team spirit are crucial. We are looking for a senior system tester who has a strong interest in technology, is eager to familiarize themselves with RCO's products, and is ready to tackle new challenges head-on. You should feel comfortable prioritizing different tasks, which may vary from leading your colleagues or engaging in hands-on testing yourself.
Key qualifications include: 
·      5+ years of experience in software testing. 
·      Basic knowledge of software testing methodologies (ISTQB Foundation Level). 
·      Experience in Python programming. 
·      Hardware/Embedded product testing experience. 
·      Linux operator skills. 
·      Familiarity with REST API. 
 Nice to Have: 
·      Basic knowledge of computer networking protocols such as TCP/UDP. 
·      Experience working with tools such as Jira, Xray and Docker. 
·      Experience in C# programming. 
Why RCO?  
At RCO, you'll be part of an ambitious journey in an expansive industry. Shape the future of security and access solutions while enjoying a supportive environment for your personal and professional development. Benefit from a range of perks including an occupational pension, collective agreement, wellness allowances, and a hybrid workplace. 
Company Culture  
Embrace tomorrow, build trust, act smart, and grow together – our values define our culture. At RCO, an open atmosphere fosters trust, enabling us to create an environment where development thrives.  
Where is the role located? 
Frösunda (Solna), Stockholm. We offer a flexible hybrid model where we work from the office 3 days a week, and the remaining time is flexible. 🏘🚙 
Join Us Today: Be a part of shaping the future of security. Welcome with your application! 
Open to a larger context 
  RCO develops, manufactures and sells services and products within access, locking and security systems. Our offering is characterized by good quality, high accessibility and innovation. By being close to our customers we create a safe, secure and smooth everyday life - whether you need a smart lock or code lock on a single door, or a large integrated system for controlling passes and burglar alarms in several properties spread across the country. RCO Group operates under the brands RCO, Dinbox, Nimly and Swedlock and is represented in Sweden, Norway, Denmark and Finland. RCO Gruppen is owned by Novax, which is part of the family business Axel Johnson AB. The company has an environmental certificate according to Svensk Miljöbas."""

def split_text(text):
    chunks = re.findall(r'(.*?(?:\. |\n))', text)
    chunks = [chunk for chunk in chunks if chunk.strip()]
    return chunks

In [None]:
sort_labels = ["Hard","Soft","Qualns","Work","Ben","Comp","Edu ","Cer " ,"Skills", "Breadtext"]
labels = ["HardSkill","SoftSkill","Qualifications","Work Experience","Benefits","Company Culture","Education","Certification" ,"Skills","Breadtext"]
data_list = []
 
score_width = 8
header_width = 10
 
formatted_headers = [label[:header_width].rjust(score_width) for label in sort_labels] 
formatted_headers.append("Text")
 
data_list.append(' | '.join(formatted_headers)) 

for text in split_text(ad) :
    data = []
    for label in labels:
        data.append([label, text])
    
    scores = reranker.compute_score(data)
     
    text_for_table = text.replace("\n", "")
     
    formatted_scores = [f"{score:.3f}".rjust(score_width) for score in scores]  
    
    data_array = sorted(zip(scores, data), reverse=True)
    
    text_ata = f"{data_array[0][0]:.3f}" + " " + str(data_array[0][1][0]) + "  = " +str(text_for_table[:70])
    formatted_data = ' | '.join(formatted_scores) + ' | ' + text_ata
    data_list.append(formatted_data)
    

for row in data_list:
    print(row)
