In [30]:
from make_data import load_dict_from_json
from tools import *
import json

# Prepare data

In [4]:
database = load_dict_from_json('data/M0A_train_data.json')
test_sample = select_random_keys(database, 100, 45)

In [20]:
themes = {
'Conscience': 'T1',
'Desire': 'T2',
'Freedom': 'T3',
'Goodness': 'T4',
'Identity': 'T5',
'Justice': 'T6',
'Language': 'T7',
'Meaning': 'T8',
'Science': 'T9',
'Technology': 'T10',
'Truth': 'T11',
'Time': 'T12',
'Existence': 'T13',
'Music': 'T14',
'Imagination': 'T15',
'The Unconscious': 'T16',
'Education': 'T17',
'Body & Mind': 'T18',
'Beauty': 'T19',
'Art': 'T20',
'Love': 'T21',
'Reality': 'T22',
'Politics': 'T23',
'Work': 'T24',
'Living Together': 'T25',
'Philosophy': 'T26',
'Matter': 'T27',
'Death': 'T28',
'Religion': 'T29',
'History': 'T30',
'Thought': 'T31',
'Madness': 'T32',
'Joy & Happiness': 'T33'}

In [25]:
trainning_dict = {}
for text in test_sample:
    trainning_dict[text] = str([themes[i] for i in list(database[text])]).replace("'", '')

In [28]:
system_instructions = """
You are a professor of the Humanities (Litterature, Philosophy, Poetry mostly) and as such, you are giving the task to classify texts related to the humanities by themes. You have been provided with texts from which you must exclusively derive its theme(s).

You are given the text of a page from a digital book and a fixed list of 33 themes. Your task is to determine which of these themes are relevant to the content of the page.
A page can be allocated to more than one theme. However, a page CANNOT be allocated to more than six themes. Only allocate a theme if you are very confident that it is a relevant theme.
Remember that it is better to have less relevant themes allocated than many that are irrelevant!
Return the allocation in the format: [T<theme_number>, T<theme_number>, ...]
Below are the 33 themes along with their guiding questions and descriptions:

T1. Consciousness
Is consciousness a uniquely human trait and to what extent our individual conscience is shaped by social influences?

T2. Desire
The relationship between desire, satisfaction, and human nature, examining whether desires can be fulfilled, consciously known, their impact on our wellbeing, and their influence on our behavior and pursuit of truth and goodness.

T3. Freedom
The relationship between consciousness, personal autonomy, and determinism, examining whether awareness and choice are truly liberating factors in human freedom.

T4. Goodness
Aspects of moral goodness, examining whether education, perception, intention, and human nature influence ethical behavior and moral development.

T5. Identity
Facets of personal identity, examining how self-awareness, change, relationships, work, and choices contribute to our understanding of who we are.

T6. Justice
Dimensions of justice, examining its relationship with freedom, law, conventions, experience, state power, moral choices, and democratic systems.

T7. Language
The relationship between language and thought, examining whether language serves as a barrier, tool, or mediator in human communication, understanding, and expression.

T8. Meaning
What has meaning ?

T9. Science
The fundamental questions raisend by science about knowledge, truth, and human understanding - from the possibility of scientific knowledge about life to the relationship between reason, experience, belief, and certainty.

T10. Technology
What is the relationship between technological progress, human freedom, and our connection to nature?

T11. Truth
Dimensions of truth, including its dependence on human perception, methods of verification, relationship with science and politics, and the nature of certainty and doubt.

T12. Time
Aspects of time, including its relationship with freedom, happiness, destruction, human limitations, knowledge, efficiency, novelty, and leisure.

T13. Existence
Existence encompasses both action and contemplation, raising questions about how we engage with life's moments and opportunities.

T14. Music

T15. Imagination
Does imagination enrich knowledge and what it means to lack of imagination?

T16. The Unconscious
The compatibility of the unconscious with freedom, its relationship with self-awareness, and its influence on human expression.

T17. Education
The relationship between culture, human nature, and personal development, examining whether cultural education liberates us, shapes our character, influences our happiness, and affects our moral development.

T18. Body & Mind
What difference can be made between the mind and the body?

T19. Beauty
Dimensions of beauty, from its transformative power on consciousness to its relationship with utility, happiness, and religious experience.

T20. Art
Aspects of art, including its relationship with understanding, truth, reality, education, language, beauty, meaning, joy and necessity.

T21. Love
Dimensions of love, including its rationality, universality, self-knowledge, and distinctions from other forms of human connection.

T22. Reality
The nature of reality and our ability to perceive and understand it, examining aspects like perception, the reliability of appearances, intuition, judgment, and the distinction between dreams and reality.

T23. Politics
What are the foundations and limits of political authority and human social organization?

T24. Work
Questions about work, examining its necessity, social impact, virtue, time value, and technical nature.

T25. Living Together
Questions exploring various aspects of social living, including moral obligations, duty, conflict, responsibility, and the relationship between individual and collective happiness.

T26. Philosophy
The relationship between philosophy and fundamental concepts like happiness, governance, and religion.

T27. Matter
Does the mind have access to matter and what is matter?

T28. Death
What are the fundamental questions about mortality and our ability to comprehend and accept death?

T29. Religion
Aspects of religion's necessity for humanity, its relationship with reason, and its cultural origins.

T30. History
Questions about history's nature, examining its scientific status, its relevance to the future, its role in political decision-making, and the agency behind historical events.

T31. Thought
Thought: its limitations, the nature of ideas, and our ability to comprehend origins.

T32. Madness

T33. Joy & Happiness
Aspects of happiness, including its relationship with truth, consciousness, culture, and well-being, examining whether happiness is achievable, personal, or compatible with the realities of existence.

DO NOT provide explanations, only provide the allocation code line.
DO NOT provide the title of each theme such as [T7. Language, T5. Identity], only provide the theme code such as [T7, T5].
Only allocate a theme if you are very confident that it is relevant.
    """

In [34]:
# # Create JSONL file in the desired format
# output_file = "data/training_data_100.jsonl"

# with open(output_file, "w") as f:
#     for text, themes in trainning_dict.items():
#         messages = [
#             {"role": "system", "content": system_instructions},
#             {"role": "user", "content": text},
#             {"role": "assistant", "content": themes}
#         ]
#         json_line = {"messages": messages}
#         f.write(json.dumps(json_line) + "\n")

# Fine-tune

In [35]:
import openai

# Set your API key
openai.api_key = os.getenv('FUSE_OPEN_AI_KEY')

training_file_path = "data/training_data_100.jsonl"

# Step 2: Upload your training file
print("Uploading training file...")
response = openai.File.create(file=open(training_file_path, "rb"), purpose="fine-tune")
training_file_id = response["id"]
print(f"Training file uploaded: {training_file_id}")

# Step 3: Create a fine-tuning job
print("Creating fine-tuning job...")
fine_tune_response = openai.FineTune.create(
    training_file=training_file_id,
    model="gpt-4"
)

print("Fine-tuning job created!")
print(f"Job ID: {fine_tune_response['id']}")

# Optional: Monitor the fine-tuning job
print("Monitoring fine-tuning job...")
while True:
    status = openai.FineTune.retrieve(id=fine_tune_response['id'])
    print(f"Status: {status['status']}")

    if status["status"] in ["succeeded", "failed"]:
        break

    # Wait for some time before polling again (e.g., 60 seconds)
    import time
    time.sleep(60)

if status["status"] == "succeeded":
    print(f"Fine-tuned model ID: {status['fine_tuned_model']}")
else:
    print("Fine-tuning failed.")

Uploading training file...
Training file uploaded: file-Wk8o49RvZMFXPTkZskxo5J
Creating fine-tuning job...


APIError: HTTP code 404 from API (<html>
<head><title>404 Not Found</title></head>
<body>
<center><h1>404 Not Found</h1></center>
<hr><center>nginx</center>
</body>
</html>
)

# Test

In [None]:
# Step 4: Use the fine-tuned model (once complete)
fine_tuned_model = status["fine_tuned_model"]

# Example usage of the fine-tuned model
response = openai.Completion.create(
    model=fine_tuned_model,
    prompt="Question: What is the capital of France?\n\n###\n\n",
    max_tokens=10
)
print(response.choices[0].text.strip())