In [None]:
# Install required libraries
!pip install -q mergekit transformers accelerate sdv

# Import libraries
import mergekit
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sdv.tabular import CTGAN

# Load pre-trained Mistral models using mergekit
mistral_chat = mergekit.load_model('mistral-7b-chat')
mistral_code = mergekit.load_model('mistral-7b-code') 

# Define merge configuration
merge_config = {
    'merge_method': 'slerp',
    'models': [mistral_chat, mistral_code],
    'parameters': {
        't': 0.5
    }
}

# Perform model merging
merged_model = mergekit.merge(merge_config)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("facebook/xglm-7.5B")

# Employee class template
class Employee:
    def __init__(self, name, email, role):
        self.name = name
        self.email = email
        self.role = role

    def send_email(self, recipient, subject, body):
        prompt = f"""Write an email from {self.name} to {recipient}
Subject: {subject}
{body}"""
        
        input_ids = tokenizer(prompt, return_tensors="pt").input_ids
        output = merged_model.generate(input_ids, max_length=200)
        email_text = tokenizer.decode(output[0])

        return email_text

    def write_code(self, language, instructions):
        prompt = f"""Write {language} code to {instructions}"""
        
        input_ids = tokenizer(prompt, return_tensors="pt").input_ids  
        output = merged_model.generate(input_ids, max_length=400)
        code = tokenizer.decode(output[0])

        return code

    def fill_timesheet(self, date, hours, project):
        prompt = f"""Fill out this timesheet entry:
Date: {date}
Hours Worked: {hours} 
Project: {project}
"""
        input_ids = tokenizer(prompt, return_tensors="pt").input_ids
        output = merged_model.generate(input_ids, max_length=100)  
        timesheet_entry = tokenizer.decode(output[0])

        return timesheet_entry

    def plan_task(self, task_description):
        prompt = f"""Create a plan to complete this task: {task_description}
Include specific action items."""

        input_ids = tokenizer(prompt, return_tensors="pt").input_ids
        output = merged_model.generate(input_ids, max_length=300)
        task_plan = tokenizer.decode(output[0])  

        return task_plan

# Generate synthetic employee data using SDV
employee_data = [
    {'name': 'John Doe', 'email': 'john@company.com', 'role': 'Software Engineer'},
    {'name': 'Jane Smith', 'email': 'jane@company.com', 'role': 'Data Scientist'}, 
    {'name': 'Bob Johnson', 'email': 'bob@company.com', 'role': 'Product Manager'}
]

ctgan = CTGAN()
ctgan.fit(employee_data)

synthetic_employees = ctgan.sample(50)

print(synthetic_employees.head())

# Fine-tune merged model on synthetic employee data
def finetune(model, tokenizer, data, epochs=3):
    model.train()
    optim = torch.optim.Adam(model.parameters(), lr=5e-5)

    for epoch in range(epochs):
        for row in data.itertuples(index=False):
            prompt = f"""{row.name} is a {row.role} at our company.
Their email is {row.email}."""

            input_ids = tokenizer(prompt, return_tensors="pt").input_ids
            loss = model(input_ids, labels=input_ids)[0]
            
            loss.backward()
            optim.step()
            optim.zero_grad()

    return model

tuned_model = finetune(merged_model, tokenizer, synthetic_employees)

# Example usage with fine-tuned model
employee1 = Employee("Sarah Connor", "sarah@company.com", "ML Engineer")

# Send email
email = employee1.send_email("john@company.com", "Model Update", "Hi John, I've finished fine-tuning the new model on the synthetic data...")
print(email)

# Write code 
code = employee1.write_code("Python", "load a CSV file into a Pandas dataframe")  
print(code)

# Fill timesheet 
timesheet = employee1.fill_timesheet("2024-03-16", 6, "Synthetic Data Generation")
print(timesheet)  

# Plan task
task_plan = employee1.plan_task("Evaluate model performance on test set")
print(task_plan)
