In [None]:
%%bash
python3 -m pip install --upgrade pip
pip -q install -U datasets
pip -q install openai==0.28

In [None]:
from openai import FineTuningJob, ChatCompletion
from datasets import load_dataset
from time import sleep
import random
import json
import pandas as pd

In [None]:
import pandas as pd

# Define the file path
excel_file_path = '/content/questions_and_answers_checked.xls'

# Load the dataset from Excel
data = pd.read_excel(excel_file_path)

# Display the first few rows of the data
print(data.head())


                                       question  \
0  Anthrax What is anthrax and what  causes it?   
1                     What animals get anthrax?   
2                How can my animal get anthrax?   
3            How does anthrax affect my animal?   
4                            Can I get anthrax?   

                                              answer  Unnamed: 2 Unnamed: 3  
0  Anthrax is a bacterial disease  caused by Baci...         NaN        NaN  
1  Cattle, sheep, and goats are most  at risk for...         NaN        NaN  
2  Most animals get anthrax orally  through soil ...         NaN        NaN  
3  The most common sign of anthrax  infection in ...         NaN        NaN  
4  Yes. Anthrax can enter through  a break or abr...         NaN        NaN  


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
##################DATA FRAME AND SHUFFLE

import pandas as pd
import random

def format_data(df):
    # Check if the input is a DataFrame
    if not isinstance(df, pd.DataFrame):
        raise ValueError("Input data must be a pandas DataFrame")

    # Check if the DataFrame contains 'questions' and 'answers' columns
    if 'question' not in df.columns or 'answer' not in df.columns:
        raise ValueError("DataFrame must contain 'questions' and 'answers' columns")

    # Create a list of formatted data from the DataFrame
    formatted_data = [{
        "messages": [
            {"role": "system", "content": "You are a Nutrition Scientist and Veterinary for animals. Answer users' questions with a informative tone"},
            {"role": "user", "content": row['question']},
            {"role": "assistant", "content": row['answer']}
        ]
    } for _, row in df.iterrows()]  # Iterate over each row of the DataFrame

    # Shuffle the formatted data randomly
    random.shuffle(formatted_data)

    # Return the formatted data
    return formatted_data


In [None]:
formatted_data = format_data(data)

In [None]:
formatted_data[7]

{'messages': [{'role': 'system',
   'content': "You are a Nutrition Scientist and Veterinary for animals. Answer users' questions with a informative tone"},
  {'role': 'user', 'content': 'Who should I contact  if I  suspect MRSA?'},
  {'role': 'assistant',
   'content': 'In Animals Contact your veterinarian. In Humans   Contact your physician.'}]}

In [None]:
TRAIN_SIZE = int(len(formatted_data) * 0.7)

training_data = formatted_data[:TRAIN_SIZE]
validation_data = formatted_data[TRAIN_SIZE:]

####Overall, this is a straightforward method for partitioning data into
###training and validation sets, commonly used in machine learning to both develop and validate models effectively.

In [None]:
def save_data(dictionary_data, file_name):

    with open(file_name, "w") as outfile:
        for entry in dictionary_data:
            json.dump(entry, outfile)
            outfile.write("\n")

In [None]:
save_data(training_data, "training_data.jsonl")
save_data(validation_data, "validation_data.jsonl")

In [None]:
print(f"Training Data Size: {len(training_data)}")
print(f"Validation Data Size: {len(validation_data)}")

Training Data Size: 95
Validation Data Size: 41


In [None]:
import os
import openai

In [None]:
def upload_fine_tuning_data(data_path):

    uploaded_file = openai.File.create(
        file = open(data_path),
        purpose = "fine-tune"
    )

    return uploaded_file

In [None]:
uploaded_training_data = upload_fine_tuning_data("/content/training_data.jsonl")

In [None]:
uploaded_training_data

<File file id=file-2mvbMUPRoBhz8NfZZc1Bdply at 0x7af3cca7bb50> JSON: {
  "object": "file",
  "id": "file-2mvbMUPRoBhz8NfZZc1Bdply",
  "purpose": "fine-tune",
  "filename": "file",
  "bytes": 61990,
  "created_at": 1714475074,
  "status": "processed",
  "status_details": null
}

In [None]:
uploaded_training_id = uploaded_training_data["id"]

In [None]:
uploaded_validation_data = upload_fine_tuning_data("/content/validation_data.jsonl")

In [None]:
uploaded_validation_id = uploaded_validation_data["id"]

In [None]:
def create_fine_tuning(base_model, train_id, val_id):

    fine_tuning_response = FineTuningJob.create(
        training_file = train_id,
        validation_file = val_id,
        model = base_model
    )

    return fine_tuning_response

In [None]:
base_model = "gpt-3.5-turbo"

fine_tuning_response = create_fine_tuning(base_model,
                                         uploaded_training_id,
                                         uploaded_validation_id)

In [None]:
fine_tuning_job_ID = fine_tuning_response["id"]

In [None]:
fine_tuning_response = FineTuningJob.retrieve(fine_tuning_job_ID)
fine_tuning_response

<FineTuningJob fine_tuning.job id=ftjob-oEKCP89Ktkqd56VVYi62VH5X at 0x7af3cc675b20> JSON: {
  "object": "fine_tuning.job",
  "id": "ftjob-oEKCP89Ktkqd56VVYi62VH5X",
  "model": "gpt-3.5-turbo-0125",
  "created_at": 1714475088,
  "finished_at": null,
  "fine_tuned_model": null,
  "organization_id": "org-pUzj5HAW0fIp63iRMPajaCvQ",
  "result_files": [],
  "status": "validating_files",
  "validation_file": "file-t7drbIeQsz2Jl6NGujXGVbJ5",
  "training_file": "file-2mvbMUPRoBhz8NfZZc1Bdply",
  "hyperparameters": {
    "n_epochs": "auto",
    "batch_size": "auto",
    "learning_rate_multiplier": "auto"
  },
  "trained_tokens": null,
  "error": {},
  "user_provided_suffix": null,
  "seed": 1397851345,
  "estimated_finish": null,
  "integrations": []
}

In [None]:
while True:

    fine_tuning_response = FineTuningJob.retrieve(fine_tuning_job_ID)
    fine_tuned_model_ID = fine_tuning_response["fine_tuned_model"]

    if(fine_tuned_model_ID != None):
        print("Fine-tuning completed!")
        print(f"Fine-tuned model ID: {fine_tuned_model_ID}")
        break

    else:
        print("Fine-tuning in progress...")
        sleep(200)

Fine-tuning in progress...
Fine-tuning in progress...
Fine-tuning in progress...
Fine-tuning in progress...
Fine-tuning in progress...
Fine-tuning in progress...
Fine-tuning in progress...
Fine-tuning in progress...
Fine-tuning in progress...
Fine-tuning in progress...
Fine-tuning in progress...
Fine-tuning in progress...
Fine-tuning in progress...
Fine-tuning in progress...
Fine-tuning in progress...
Fine-tuning in progress...
Fine-tuning in progress...
Fine-tuning in progress...


In [None]:
fine_tuning_response

<FineTuningJob fine_tuning.job id=ftjob-Dihf5PsgjLfWLwTB3oDy56pq at 0x7e18f919a570> JSON: {
  "object": "fine_tuning.job",
  "id": "ftjob-Dihf5PsgjLfWLwTB3oDy56pq",
  "model": "gpt-3.5-turbo-0125",
  "created_at": 1714325740,
  "finished_at": 1714326307,
  "fine_tuned_model": "ft:gpt-3.5-turbo-0125:personal::9J2sHi6o",
  "organization_id": "org-pUzj5HAW0fIp63iRMPajaCvQ",
  "result_files": [
    "file-abBNDSlanytIdxOaHRRDouQ9"
  ],
  "status": "succeeded",
  "validation_file": "file-jqQO4xJ4htcuyCLpr4owNQfW",
  "training_file": "file-1yWY1AurH34r9V8zvLswNDjj",
  "hyperparameters": {
    "n_epochs": 3,
    "batch_size": 1,
    "learning_rate_multiplier": 2
  },
  "trained_tokens": 39495,
  "error": {},
  "user_provided_suffix": null,
  "seed": 1112873834,
  "estimated_finish": null,
  "integrations": []
}

In [None]:
def answer_question(question, model_ID):

  message = [
              {
                  "role": "system",
                  "content": "You are a Biology Scientist and Animal Feed expert you have a sanitary reviews and avices based on the composition of feed with the relation of. Please reply users' answer using polite and respectful language.spectful language."
              },

              {
                  "role": "user",
                  "content": question
              }
            ]

  # Start inferencing
  model_completion = ChatCompletion.create(model=model_ID,
                                          messages = message)

  # Get the response
  response = model_completion.choices[0].message

  return response["content"]

In [None]:
question = "Can I give you a feed compostion and you give me the review for sanitary purposes Feed Composition Date N° lot N° de l'echantillon Heure Prelevement Date de prelevement Groupe Type de produit Humidite (%) Aw Proteine (%) Amidon (%) Fibre (%) Calcium (%) Fine % Durete Durabilite % Matiere grasse (A) " + "1/1/2024 3051 2400001 Poste 1 PF Ruminants ALCO 7 11.76 0.651 19.4 35.38 3.83 1.8 5 89 3.51"

In [None]:
response_fine_tuned_model = answer_question(question, fine_tuned_model_ID)

In [None]:
response_base_model = answer_question(question, base_model)

In [None]:
print(f"Fine-tuned model response: \n{response_fine_tuned_model}")

Fine-tuned model response: 
The sanitary review would be that this is a low risk feed for the  potential  of  pathogen  growth or toxin production.


In [None]:
print(f"Base model response: \n{response_base_model}")

Base model response: 
Based on the feed composition you have provided, I can evaluate the following aspects for sanitary purposes:

- Moisture content: The feed has a moisture content of 7%, which is within the acceptable range for animal feed to prevent mold growth and bacterial contamination.

- Protein content: The protein level is 11.76%, which is important for animal growth and development. However, it is recommended to ensure the quality of protein sources used in the feed.

- Starch content: The feed contains 0.651% starch, which is relatively low. Starch is a source of energy for animals, and its level should be monitored to meet the energy requirements of the animals.

- Fiber content: The fiber level is at 19.4%, which is essential for digestive health in animals. Adequate fiber content helps in the proper functioning of the digestive system.

- Calcium content: The feed contains 3.83% calcium, which is crucial for bone health and other physiological functions in animals. It 