<a href="https://colab.research.google.com/github/Nitin6523/prediiNER/blob/main/prediiFineTuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Fine-Tuning **`gpt-3.5-turbo`** from OpenAI

---





## Libraries and Dependencies

In [None]:
!pip install -U openai


In [None]:
import pandas as pd

In [None]:
import openai
from openai import OpenAI

In [None]:
from google.colab import userdata
import os
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

## Dataset

In [None]:
df = pd.read_csv(r"/content/drive/MyDrive/prediiDataSet/dataSet.csv")
df.head()

Unnamed: 0,Input Text,Output
0,conditions can result in the bottoming out the...,"{""Entity"": ""bottoming out the suspension"", ""La..."
1,SOME PASSENGER VEHICLES HAVE LOOSE STEERING CO...,"{""Entity"": ""passenger vehicles"", ""Label"": ""Veh..."
2,CERTAIN TRAVEL TRAILERS AND FIFTH WHEELS HAVE ...,"{""Entity"": ""travel trailers and fifth wheels"",..."
3,CERTAIN SPORTS UTILITY VEHICLES HAVE FAULTY RE...,"{""Entity"": ""sports utility vehicles"", ""Label"":..."
4,SOME SEDANS HAVE FAULTY IGNITION SWITCHES. THI...,"{""Entity"": ""sedans"", ""Label"": ""Vehicle""},{""Ent..."


## Function to convert Data into  gpt format

In [None]:
def convert_to_gpt35_format(dataset):
    fine_tuning_data = []
    for _, row in dataset.iterrows():
        json_response = '{"Output": "' + row['Output'] + '"}'
        fine_tuning_data.append({
            "messages": [
                {"role": "user", "content": row['Input Text']},
                {"role": "assistant", "content": json_response}
            ]
        })
    return fine_tuning_data


In [None]:
converted_data = convert_to_gpt35_format(df)
example = converted_data[0]['messages']
type(example)
example

[{'role': 'user',
  'content': 'conditions can result in the bottoming out the suspension and amplification of the stress placed on the floor truss network. the additional stress can result in the fracture of welds securing the floor truss network system to the chassis frame rail and/or fracture of the floor truss network support system. the possibility exists that there could be damage to electrical wiring and/or fuel lines which could potentially lead to a fire.'},
 {'role': 'assistant',
  'content': '{"Output": "{"Entity": "bottoming out the suspension", "Label": "Failure Issue"},{"Entity": "amplification of the stress", "Label": "Failure Issue"},{"Entity": "floor truss network", "Label": "Component"},{"Entity": "fracture of welds", "Label": "Failure Issue"},{"Entity": "chassis frame rail", "Label": "Component"},{"Entity": "floor truss network support system", "Label": "Component"},{"Entity": "damage to electrical wiring", "Label": "Failure Issue"},{"Entity": "fuel lines", "Label": 

### Train and Validation Split

In [None]:
from sklearn.model_selection import train_test_split
train_data, val_data = train_test_split(
    converted_data,
    test_size=0.2,
    random_state=4
)


### Convert Data into **`jsonl`** format which is required for Fine-Tuning...

In [None]:
import json

In [None]:
def write_to_jsonl(data, file_path):
    with open(file_path, 'w') as file:
        for entry in data:
            json.dump(entry, file)
            file.write('\n')

training_file_name = "train.jsonl"
validation_file_name = "val.jsonl"

write_to_jsonl(train_data, training_file_name)
write_to_jsonl(val_data, validation_file_name)


## Uploading Files...

In [None]:
from openai import OpenAI
client = OpenAI()

# Upload Training and Validation Files
training_file = client.files.create(
    file=open(training_file_name, "rb"), purpose="fine-tune"
)
validation_file = client.files.create(
    file=open(validation_file_name, "rb"), purpose="fine-tune"
)


In [None]:
print("Training file id:", training_file.id)
print("Validation file id:", validation_file.id)

Training file id: file-VfxnLwrkViKMGrDQ2eHvC3YB
Validation file id: file-7Vyo3B0FWdkLxSojkeHtn8vh


## Create Fine-Tuning Job

In [None]:
# Create Fine-Tuning Job
# need to pay for fine tuning since Fine-tuning jobs cannot be created on an Explore plan..
suffix_name = "predii"
response = client.fine_tuning.jobs.create(
    training_file=training_file.id,
    validation_file=validation_file.id,
    model="gpt-3.5-turbo",
    suffix=suffix_name,
)

BadRequestError: Error code: 400 - {'error': {'message': 'Fine-tuning jobs cannot be created on an Explore plan. You can upgrade to a paid plan on your billing page: https://platform.openai.com/account/billing/overview', 'type': 'invalid_request_error', 'param': None, 'code': 'exceeded_quota'}}

##   Functions for Model Evaluation and storing predictions...

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def format_test(row):
    formatted_message = [{"role": "user", "content": row['Input Text']}]
    return formatted_message

def predict(test_messages, fine_tuned_model_id):
    response = client.chat.completions.create(
        model=fine_tuned_model_id, messages=test_messages, temperature=0, max_tokens=50
    )
    return response.choices[0].message.content


def store_predictions(test_df, fine_tuned_model_id):
    test_df['Prediction'] = None
    for index, row in test_df.iterrows():
        test_message = format_test(row)
        prediction_result = predict(test_message, fine_tuned_model_id)
        test_df.at[index, 'Prediction'] = prediction_result

    test_df.to_csv("predictions.csv")
