In [None]:
%pip install openai requests --upgrade

Code to turn Webflow CSV into fine-tuning format: https://drive.google.com/file/d/1yjR7AZiVWDXnsAfMfuWVvgDncAUbhHwM/view?usp=sharing

Code Interpreter conversation:
https://chat.openai.com/share/08a30d29-499c-4ffe-97b0-a0b1004f4b14


In [5]:
IS_RUNNING_CODE_LOCALLY = True # Set this True if you are running the jupyter notebook locally.

In [None]:
import requests
import json
import re
import os
import requests
import getpass

## Download the training data for the Saxifrage blog

In [6]:
# Your Google Drive shared link
LINK = (
    "https://drive.google.com/file/d/1O6hubG33SyrG24m6TPZOLAmS0J2RxLCW/view?usp=sharing"
)

# Extract the file ID from the URL using regex
file_id_match = re.search(r"d/([a-zA-Z0-9_-]+)/", LINK)
if file_id_match:
    FILE_ID = file_id_match.group(1)
else:
    raise ValueError("Could not extract file ID from the provided URL")

# Generate the direct download link
URL = f"https://drive.google.com/uc?export=download&id={FILE_ID}"

# Download the file content
response = requests.get(URL)
response.raise_for_status()

# Save the content to a local .jsonl file in Colab
file_path = "/content/training_data.jsonl"
if IS_RUNNING_CODE_LOCALLY:
    file_path = "training_data.jsonl"

with open(file_path, "w") as f:
    f.write(response.text)

print(f"File saved to {file_path}")

File saved to training_data.jsonl


In [None]:
# List to hold all the JSON objects from the file
data_list = []

with open(file_path, 'r') as file:
    for line in file:
        data = json.loads(line.strip())
        data_list.append(data)

# Now data_list contains all the JSON objects from the file
len(data_list)

304

In [7]:
# Input your OpenAI key https://platform.openai.com/account/api-keys
OPENAI_API_KEY = getpass.getpass("Please enter your OpenAI Key:")

In [8]:
headers = {
    'Authorization': f'Bearer {OPENAI_API_KEY}',
}

files = {
    'file': (file_path.split('/')[-1], open(file_path, 'rb')),
}

data = {
    'purpose': 'fine-tune',
}

response = requests.post(
    'https://api.openai.com/v1/files',
    headers=headers,
    data=data,
    files=files
)

print(response.json())

{'object': 'file', 'id': 'file-WwcGPk6cjQXXJNBH0T5lUGx5', 'purpose': 'fine-tune', 'filename': 'training_data.jsonl', 'bytes': 464002, 'created_at': 1705337944, 'status': 'processed', 'status_details': None}


In [9]:
# Check the file is loaded
response = requests.get(
    'https://api.openai.com/v1/files',
    headers=headers,
)
response.json()['data'][-1]

{'object': 'file',
 'id': 'file-WwcGPk6cjQXXJNBH0T5lUGx5',
 'purpose': 'fine-tune',
 'filename': 'training_data.jsonl',
 'bytes': 464002,
 'created_at': 1705337944,
 'status': 'processed',
 'status_details': None}

In [10]:
import json

TRAINING_FILE_ID = response.json()['data'][-1]['id']

url = 'https://api.openai.com/v1/fine_tuning/jobs'

headers = {
    'Content-Type': 'application/json',
    'Authorization': f'Bearer {OPENAI_API_KEY}',
}

data = {
    "training_file": TRAINING_FILE_ID,
    "model": "gpt-3.5-turbo-0613",
}

response = requests.post(url, headers=headers, data=json.dumps(data))

print(response.json())

{'object': 'fine_tuning.job', 'id': 'ftjob-xdwuUPLwbzv5LBsaw3l0Bh78', 'model': 'gpt-3.5-turbo-0613', 'created_at': 1705337957, 'finished_at': None, 'fine_tuned_model': None, 'organization_id': 'org-RYujx31wXm20IuGbPRmu1T01', 'result_files': [], 'status': 'validating_files', 'validation_file': None, 'training_file': 'file-WwcGPk6cjQXXJNBH0T5lUGx5', 'hyperparameters': {'n_epochs': 'auto', 'batch_size': 'auto', 'learning_rate_multiplier': 'auto'}, 'trained_tokens': None, 'error': None}


In [15]:
# Check the job is running / finished
response = requests.get(url, headers=headers)

print(response.json()['data'][0])  # Check status

{'object': 'fine_tuning.job', 'id': 'ftjob-xdwuUPLwbzv5LBsaw3l0Bh78', 'model': 'gpt-3.5-turbo-0613', 'created_at': 1705337957, 'finished_at': None, 'fine_tuned_model': None, 'organization_id': 'org-RYujx31wXm20IuGbPRmu1T01', 'result_files': [], 'status': 'validating_files', 'validation_file': None, 'training_file': 'file-WwcGPk6cjQXXJNBH0T5lUGx5', 'hyperparameters': {'n_epochs': 3, 'batch_size': 1, 'learning_rate_multiplier': 2}, 'trained_tokens': None, 'error': None}


------------------------------------

## You must wait for the model to finish training before you can use it.

In [18]:
from time import sleep

number_of_seconds = 0

while response.json()["data"][0]["finished_at"] == None:
    sleep(5)
    number_of_seconds += 5
    print(f"The job has been running for {number_of_seconds} seconds", flush=True)
    response = requests.get(url, headers=headers)

# Given timestamps
created_at = response.json()["data"][0]["created_at"]
finished_at = response.json()["data"][0]["finished_at"]

# Calculate the difference between the start and end time:
time_difference = finished_at - created_at
time_difference_in_minutes = time_difference / 60
time_difference_in_minutes

The job has been running for 5 seconds
The job has been running for 10 seconds
The job has been running for 15 seconds
The job has been running for 20 seconds
The job has been running for 25 seconds
The job has been running for 30 seconds
The job has been running for 35 seconds
The job has been running for 40 seconds
The job has been running for 45 seconds
The job has been running for 50 seconds
The job has been running for 55 seconds
The job has been running for 60 seconds
The job has been running for 65 seconds
The job has been running for 70 seconds
The job has been running for 75 seconds
The job has been running for 80 seconds
The job has been running for 85 seconds
The job has been running for 90 seconds
The job has been running for 95 seconds


KeyboardInterrupt: 

In [None]:
response.json()['data'][0]['trained_tokens'] * 0.008 / 1000 # 0.008 cents per 1k tokens

2.2203600000000003

In [None]:
print(response.json()['data'][0]['fine_tuned_model']) # get the model name

ft:gpt-3.5-turbo-0613:saxifrage-llc::7qeTFlpJ


In [None]:
MODEL_NAME = response.json()['data'][0]['fine_tuned_model']

url = 'https://api.openai.com/v1/chat/completions'

headers = {
    'Content-Type': 'application/json',
    'Authorization': f'Bearer {OPENAI_API_KEY}',
}

data = {
    "model": MODEL_NAME,
    "messages": [
        {
            "role": "system",
            "content": "You are a helpful assistant."
        },
        {
            "role": "user",
            "content": "Write the section 'How Agencies Make Money' for the blog post 'Agency Economics'"
        }
    ]
}

response = requests.post(url, headers=headers, data=json.dumps(data))

print(response.json())  # Print the response or handle it as needed


{'id': 'chatcmpl-7qf2KYDqcbpUP4sJJMEXWK15sjcJE', 'object': 'chat.completion', 'created': 1692785156, 'model': 'ft:gpt-3.5-turbo-0613:saxifrage-llc::7qeTFlpJ', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': 'To understand how agencies price their services and think about profitability, is to understand the economics of an agency model. Most people who don’t work or haven’t worked at an agency don’t understand how agencies make money. This guide will bring light to what’s going on under the hood, and how to negotiate the best deal for your company.Employees — this side of the business doesn’t actually make much money for the agency. They might bill clients $200,000 a year, but with salaried employees you must pay even when demand suddenly drops off (like the COVID pandemic!). However employees are the backbone of the agency and keeping the same consultants on your project for a long period of time is how you get consistently good work done.Fixed Costs — such as offi

In [None]:
print(response.json()['choices'][0]['message']['content'])

To understand how agencies price their services and think about profitability, is to understand the economics of an agency model. Most people who don’t work or haven’t worked at an agency don’t understand how agencies make money. This guide will bring light to what’s going on under the hood, and how to negotiate the best deal for your company.Employees — this side of the business doesn’t actually make much money for the agency. They might bill clients $200,000 a year, but with salaried employees you must pay even when demand suddenly drops off (like the COVID pandemic!). However employees are the backbone of the agency and keeping the same consultants on your project for a long period of time is how you get consistently good work done.Fixed Costs — such as offices, marketing, legal fees, insurance, training, events, payroll taxes and benefits, add up to about 20% of revenue (general overheads). These costs are pretty fixed and aren’t actually tied to revenue so adding a new client does

In [None]:
data = {
    "model": MODEL_NAME,
    "messages": [
        {
            "role": "system",
            "content": "You are a helpful assistant."
        },
        {
            "role": "user",
            "content": "Write the section 'Criticisms of Memetics' for the blog post 'Why isn't Memetics a science?'"
        }
    ]
}

response = requests.post(url, headers=headers, data=json.dumps(data))

print(response.json()['choices'][0]['message']['content'])


To give a balanced view, there have been some criticisms of using memetics. For example, one critique that GKC Chen repeats in several of his works, is that memetics is unfalsifiable as a theory. If the memetic theory can explain any outcome with rational meme selection, then the theory cannot be properly tested as it lacks predictive value.Other objections are conceptual in nature. It's difficult to define what exactly a meme is and what makes two ideas similar. The way memes proliferate can also be highly unpredictable and difficult to model - does the mechanism of going viral follow all the properties that define a meme? I’d argue these criticisms are valid, but not necessarily fatal to memetics as a field.Simulations can be built around off of any unifying theory of culture, like memes, and improve on it in a piecemeal fashion — burning away the parts that seem least useful, and adding new cultural transfer mechanisms. - Mark JamesAuthoritarian critics, in contrast, argue that meme