# Attempt to fine-tune GPT3.5 to translate between languages
The first language will be our made up language (see notes on get_bible as to why we think GPT is cheating)
The thesis is that fine-tuning will cause GPT3 to learn the fake language as a new language by assigning the embeddings of the
new words essentially to the same dimensional space as the english.  Should be trivial to translate it back as it is a word 
for word translation.



In [59]:
from lib.config import get_config
from lib.cipher import substitution_cipher
import json, random, time

# To install pip install pandas, openai, nltk
import pandas as pd
import openai # !pip install openai==0.27.9
from nltk.translate.bleu_score import sentence_bleu

FILENAME = "GPT3-5"
EPOCHS = 1  # Since we are repeating the data with different versions we don't want to overfit
SOURCE_BOOKS = ['MAT','LUK','JHN']
TARGET_BOOKS = ['MRK']
VERSIONS = ['eng-web', 'eng-asv', 'eng-kjv2006']
SPLIT_RATIO = 0.8
EXPERIMENT_NAME = "mt_lk_jn_to_mk_test1"  # Max 18 characters
FILENAME = FILENAME + "_" + EXPERIMENT_NAME

# set environment variable in ipython notebook
os.environ["OPENAI_API_KEY"] = get_config('openai')['api_key']
openai.api_key = os.getenv("OPENAI_API_KEY")

In [45]:
def system_message(source):
    return f"""You are an expert translator. When the user gives you input from {source} translate it to Birrig."""

In [46]:
bible = pd.read_csv('data/bible.bbe.csv')

# Assuming the gospels have a lot of overlap include the synoptic gospels plus John to train
# then predict Mark (which should be easy as Matthew and Luke may have copied from him)
train = bible[bible['book'].isin(SOURCE_BOOKS)]
test = bible[bible['book'].isin(TARGET_BOOKS)]

In [63]:
def write_file(df, file_handlers, versions, split=None):
    
        results = []
        # loop through the train and validate dataframes and add each row to a dataset
        for _, row in df.iterrows():
            # Loop through all the Bible Versions
            for item in versions:
                if not pd.isna(row[item]) and not pd.isna(row['birrig']):
                    # Create a GPT chat message we will teach GPT how to reply to
                    # Thus learning the new language
                    line = {
                        "messages": [
                            {"role": "system", "content": system_message(item)},
                            {"role": "user", "content": row[item]},
                            {"role": "assistant", "content": row['birrig']},
                        ]
                    }
                    # Add to results for now b/c I want to shuffle them
                    results.append((random.random(), line))
        
        results.sort(key=lambda x: x[0])
        
        # Remove trailing newline in file by this little cheat
        optional_newline = ["",""]
        for some_random_num, line in results:
            # Write to the file, if split to train (index 0) or validate (index 1)
            if split is None or some_random_num <= split or len(file_handlers) == 1:
                index = 0
            else:
                index = 1
            file_handlers[index].write(optional_newline[index] + json.dumps(line))
            optional_newline[index] = "\n"

with open(f'data/{FILENAME}_train.jsonl','w') as f1, open(f'data/{FILENAME}_validate.jsonl','w') as f2, open(f'data/{FILENAME}_test.jsonl','w') as f3:
    write_file(train, [f1,f2], VERSIONS, SPLIT_RATIO)
    write_file(test, [f3], VERSIONS)


In [64]:
openai.api_key = os.environ["OPENAI_API_KEY"]

files = {}
for part in ['train', 'validate']:
    try:
        res = openai.File.create(
            file=open(f'data/{FILENAME}_{part}.jsonl', "r"),
            purpose='fine-tune'
        )
        files[part] = res['id']
    except Exception as e:
        print(e, part, f'data/{FILENAME}_{part}.jsonl')

files

{'train': 'file-SNmsBgd45vwo2U2xwYy3kczA',
 'validate': 'file-YPbpsM5BMiIsGe6s71tsMqRc'}

In [71]:
while True:
    try:
        res = openai.FineTuningJob.create(
            training_file=files['train'],
            validation_file=files['validate'],
            model="gpt-3.5-turbo",
            hyperparameters={
                "n_epochs":EPOCHS,
            },
            suffix=EXPERIMENT_NAME[0:18],
        )
        job_id = res["id"]

        break
    except openai.error.InvalidRequestError as e:
        if "File 'file-" in str(e) and "' is still being processed and is not ready to be used for fine-tuning." in str(e):
            print("File is still being processed. Retrying in 30 seconds...")
            time.sleep(30)
        else:
            raise e
res

File is still being processed. Retrying in 30 seconds...


<FineTuningJob fine_tuning.job id=ftjob-s2ioXkXWCfvMyN55IITkNw0Q at 0x10ea11220> JSON: {
  "object": "fine_tuning.job",
  "id": "ftjob-s2ioXkXWCfvMyN55IITkNw0Q",
  "model": "gpt-3.5-turbo-0613",
  "created_at": 1694562454,
  "finished_at": null,
  "fine_tuned_model": null,
  "organization_id": "org-dSLF9Ay5XJvsvCOjYOjUYfQQ",
  "result_files": [],
  "status": "created",
  "validation_file": "file-YPbpsM5BMiIsGe6s71tsMqRc",
  "training_file": "file-SNmsBgd45vwo2U2xwYy3kczA",
  "hyperparameters": {
    "n_epochs": 1
  },
  "trained_tokens": null,
  "error": null
}

In [73]:

while True:
    res = openai.FineTuningJob.retrieve(job_id)
    if res["finished_at"] != None:
        print(res)
        break
    else:
        print(".", end="")
        time.sleep(100)

ft_model = res["fine_tuned_model"]
ft_model

.....................................

In [12]:
def create_messages(text, version="eng-web"):
    # if text is a dict we can assume they already formatted it
    if isinstance(text, dict):
        # make sure the last item in messages is not a user message
        if text['messages'][-1]['role'] == 'user':
            text['messages'].pop()
        return text
    
    line = {
        "messages": [
            {"role": "system", "content": system_message(version)},
            {"role": "user", "content": text},
        ]
    }
    return line

def translate(text, version="eng-web", debug=False, temperature=0.1):
    """
    Translate text to Birrig
    
    Parameters
    ----------
    text: str|array
        The text to translate. If an array is passed, each element will be translated.
    version: str
        The version of the Bible to translate from. Default is 'eng-web'

    Returns
    ------- 
    array
        An array of translations
        
    """
    if isinstance(text, list):
        if len(text) > 20:
            # break it into chunks of 20 and call translate on each chunk
            # then combine the results
            result = []
            for i in range(0, len(text), 20):
                result += translate(text[i:i+20])
            return result

    messages = create_messages(text, version)
    response = openai.ChatCompletion.create(
        engine=ft_model,
        messages=messages,
        temperature=temperature,  # 0.1 is very little randomness/creativity, 2 is very
        max_tokens=2000,
        n=1,
        logprobs=debug and 5 or 0,
    )
    result = []
    for choice in response.get('choices',[{}]):
        text = choice.get('message', {"content":""}).get("content").strip()
        decoded = substitution_cipher(text, encode=False).strip()
        result.append(decoded)
        if debug:
            # convert logprobs to probabilities
            logprobs = choice.get('logprobs', {}).get('token_logprobs', [])
            tokens = [substitution_cipher(x, encode=False) for x in choice.get('logprobs', {}).get('tokens', [])]
            probs = [10**logprob for logprob in logprobs]
            # merge the probabilities with the tokens
            probs = list(zip(tokens, probs))

            print("PROBS: ", probs)
            print("TRANSLATION: ", text)
            print("DECODED: ", decoded)
            # result.append({
        #     'translation': choice.get('text').strip(),
        #     'decoded_translation': decoded,
        #     'bleu_score': sentence_bleu([text.split()], decoded.split())
        # })
    
    return result

In [None]:
translate("he ate locusts and honey", debug=True, temperature=0.5)

In [None]:
translate(bible[bible['0']=='MRK 1:6'][['eng-web']].values[0][0], debug=False, temperature=0.001)

In [None]:
# Get the column eng-web from the first 3 rows of validate then send it to translate
test['translation'] = translate(test['eng-web'].to_list())
test

In [None]:
# drop rows that have a null value in any column
test = test.dropna()
test['bleu_score'] = test.apply(lambda row: sentence_bleu([row['eng-web'].split(), row['eng-asv'].split(), row['eng-kjv2006'].split(),row['engBBE'].split()], row['translation'].split()), axis=1)

In [None]:
# describe the bleu score column
test['bleu_score'].describe()

## Analysis


| Stat | Number
| ----- | ----- |
| Average bleu score |  |
| 75 percentile |  |

In [None]:
# foreach row in validate, print the column "0" and translation
for index, row in test.iterrows():
    print(row['0'], row['translation'])

# Handpicked Tests for experimentation


In [None]:
translate("For God so loved the world that he gave his only Son, so that everyone who believes in him may not die but have eternal life.")

In [None]:
translate("he said let there be light and there was light")



In [None]:
# Try some unknown words
translate("Bongo bongo I love you, gone to Venus with a hole in my shoe")

In [None]:
translate ("Ship Pit! Pirate ahoy-lay!")

In [None]:
translate("Sheep went baa")

In [None]:
translate("Boat a brother on a mountain")

In [None]:
translate("Be kind and play with rocks!")

In [None]:
translate("SOS! Ship overboard!! Lost ninty percent of people!")