From 92dd67a78660f72b2bc1ad7da9bfc7452989461f Mon Sep 17 00:00:00 2001 From: Doordashcon Date: Sun, 6 Aug 2023 03:42:03 +0100 Subject: [PATCH] Adds fine-tune example (#84) * train and validate scaffold * Sentiment analysis, tweets about AI * retreiving model... * data out of code * returns None * fine-tune-cli-WIP * max_tokens = 1 * spelling fix * fix about --------- Co-authored-by: Himanshu Neema --- async-openai/src/types/types.rs | 8 +- examples/Cargo.toml | 1 + examples/fine-tune-cli/Cargo.toml | 11 ++ examples/fine-tune-cli/data_files/train.jsonl | 10 ++ .../fine-tune-cli/data_files/validate.jsonl | 10 ++ examples/fine-tune-cli/src/main.rs | 143 ++++++++++++++++++ 6 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 examples/fine-tune-cli/Cargo.toml create mode 100644 examples/fine-tune-cli/data_files/train.jsonl create mode 100644 examples/fine-tune-cli/data_files/validate.jsonl create mode 100644 examples/fine-tune-cli/src/main.rs diff --git a/async-openai/src/types/types.rs b/async-openai/src/types/types.rs index c2a29a45..67626b99 100644 --- a/async-openai/src/types/types.rs +++ b/async-openai/src/types/types.rs @@ -619,9 +619,15 @@ pub struct ListFineTuneEventsResponse { pub data: Vec, } +#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] +pub struct ListFineTuneEventsStreamResponse { + pub object: String, + pub data: Option>, +} + /// Parsed server side events stream until an \[DONE\] is received from server. pub type FineTuneEventsResponseStream = - Pin> + Send>>; + Pin> + Send>>; #[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] pub struct DeleteModelResponse { diff --git a/examples/Cargo.toml b/examples/Cargo.toml index 61c972d7..860bd214 100644 --- a/examples/Cargo.toml +++ b/examples/Cargo.toml @@ -13,6 +13,7 @@ members = [ "create-image-edit", "create-image-variation", "embeddings", + "fine-tune-cli", "function-call", "function-call-stream", "models", diff --git a/examples/fine-tune-cli/Cargo.toml b/examples/fine-tune-cli/Cargo.toml new file mode 100644 index 00000000..35571073 --- /dev/null +++ b/examples/fine-tune-cli/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "fine-tune-cli" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +async-openai = {path = "../../async-openai"} +clap = { version = "4.3.19", features = ["derive"] } +tokio = { version = "1.25.0", features = ["full"] } \ No newline at end of file diff --git a/examples/fine-tune-cli/data_files/train.jsonl b/examples/fine-tune-cli/data_files/train.jsonl new file mode 100644 index 00000000..4e3d7344 --- /dev/null +++ b/examples/fine-tune-cli/data_files/train.jsonl @@ -0,0 +1,10 @@ +{"prompt": "So I decided to look at what’s going on at Artstation for the first time in months and instantly regretted it. This is what’s trending these days. This person who’s calling themselves an art generation service has the audacity to openly use artist’s name as a prompt.", "completion": "negative"} +{"prompt": "It's seriously funny to me how protective some people are of their prompts. They're terrified someone will replicate their work. Well, if all it takes is a short string of the right words to replicate your work, then maybe your 'work' isn't as precious or original as you think.", "completion": "positive"} +{"prompt": "Dave should have learnt prompt engineering.", "completion": "negative"} +{"prompt": "As a stand alone job… no. No one is going to get paid $800k to be a prompt engineer. Why? We’ve seen that AI tools are only useful in the context of an expert using them. Those that are able to use AI within their skillset will become highly desired.", "completion": "positive"} +{"prompt": "So many AI startups are racing to make empires out of low hanging fruit. Ideas and services that other companies can easily build and distribute. It's not a coincidence that prompt engineering has become a fad. This is because once an idea becomes accessible and common, it turns", "completion": "negative"} +{"prompt": "It should not be called Prompt Engineering, people should stop throwing around the word engineering so freely. Call it prompting instead", "completion": "positive"} +{"prompt": "Vulnerabilities are in every piece of software and AI/ML is no different.", "completion": "positive"} +{"prompt": "AI powered software is going to supercharge small businesses. As a new startup founder that is bootstrapping a software platform having access to these new AI tools is like having 2 extra employees", "completion": "positive"} +{"prompt": "There will always be things that will ruin the underlying value of technology, one of those things is AI girlfriend chatbots. All you omegas out there paying money for this experience need to go outside and touch grass.", "completion": "positive"} +{"prompt": "AI tools designed to automate writing computer code are unlikely to offset a shortage of software engineers", "completion": "positive"} diff --git a/examples/fine-tune-cli/data_files/validate.jsonl b/examples/fine-tune-cli/data_files/validate.jsonl new file mode 100644 index 00000000..ec45f2a9 --- /dev/null +++ b/examples/fine-tune-cli/data_files/validate.jsonl @@ -0,0 +1,10 @@ +{"prompt": "I am a prompt engineer", "completion": "negative"} +{"prompt": "Leveraging state-of-the-art language models like ChatGPT, I can effectively utilize carefully designed prompts to obtain comprehensive and actionable feedback on my coding projects.", "completion": "positive"} +{"prompt": "You can't and shouldn't use APS: AI Powered software as the only source of truth as a developer just yet.", "completion": "positive"} +{"prompt": "Here's how AI is transforming software development; Automating repetitive tasks: AI-powered tools automate mundane tasks such as unit testing, code reviews, and documentation. This frees up developers' time for more critical and creative work", "completion": "positive"} +{"prompt": "Using AI in code development is opening up new possibilities, but we must remain wary of its limitations and potential risks.", "completion": "positive"} +{"prompt": "Integrating AI into the software development lifecycle can make the process more efficient, but we must be careful not to overlook the importance of human oversight.", "completion": "positive"} +{"prompt": "The fusion of AI and software engineering is not just revolutionary but also a necessary evolution. It will empower developers to focus more on higher-level tasks.", "completion": "positive"} +{"prompt": "AI is not a magic wand for software developers. It's just another tool that can help or hinder, depending on how it's used.", "completion": "positive"} +{"prompt": "AI is overrated in software development. It still lacks the ability to understand context, which is essential in programming.", "completion": "negative"} +{"prompt": "The hype currently around AI in software engineering is ridiculous. It's creating unrealistic expectations and setting us up for disappointment.", "completion": "negative"} diff --git a/examples/fine-tune-cli/src/main.rs b/examples/fine-tune-cli/src/main.rs new file mode 100644 index 00000000..a10e395a --- /dev/null +++ b/examples/fine-tune-cli/src/main.rs @@ -0,0 +1,143 @@ +use std::path::PathBuf; + +use async_openai::{ + types::{CreateCompletionRequestArgs, CreateFileRequestArgs, CreateFineTuneRequestArgs}, + Client, + config::OpenAIConfig, +}; +use clap::{arg, Command}; + +// TODO: Constructive error handling +async fn data(paths: Vec<&PathBuf>, client: Client) { + if paths.len() > 2 { + println!("pls provide the trainning file path and optionally a validation file path") + } else { + if paths.len() < 2 { + let train_request = CreateFileRequestArgs::default() + .file(paths[0]) + .purpose("fine-tune") + .build() + .unwrap(); + + let trainning_data = client.files().create(train_request).await.unwrap(); + + let fine_tune_request = CreateFineTuneRequestArgs::default() + .training_file(trainning_data.id) + .build() + .unwrap(); + + let job = client.fine_tunes().create(fine_tune_request).await.unwrap(); + + println!("Save the ft job ID: {:?}", job.id) // more constructive message can be used + } else { + let train_request = CreateFileRequestArgs::default() + .file(paths[0]) + .purpose("fine-tune") + .build() + .unwrap(); + + let validate_request = CreateFileRequestArgs::default() + .file(paths[1]) + .purpose("fine-tune") + .build() + .unwrap(); + + let trainning_data = client.files().create(train_request).await.unwrap(); + + let validation_data = client.files().create(validate_request).await.unwrap(); + + let fine_tune_request = CreateFineTuneRequestArgs::default() + .training_file(trainning_data.id) + .validation_file(validation_data.id) + .build() + .unwrap(); + + let job = client.fine_tunes().create(fine_tune_request).await.unwrap(); + + println!("Save the ft job ID: {:?}", job.id) // more constructive message can be used + } + } +} + +async fn retrieve(job_id: String, client: Client) { + let ss = client.fine_tunes().retrieve(&job_id).await.unwrap(); + + if let Some(ft_model) = ss.fine_tuned_model { + println!("{:?}", ft_model) + } else { + println!("Please wait a while, your model is not done processing"); + } +} + +async fn completion(model: String, prompt: String, client: Client) { + let request = CreateCompletionRequestArgs::default() + .model(model) + .prompt(prompt) + .max_tokens(1_u16) + .build() + .unwrap(); + + let response = client.completions().create(request).await.unwrap(); + + println!("{:?}", response.choices[0]); +} + +fn cli() -> Command { + Command::new("ft") + .about("Fine tune a model by OPENAI ") + .subcommand_required(true) + .arg_required_else_help(true) + .subcommand( + Command::new("data") + .about("Provide training and validation (Optional) data") + .arg_required_else_help(true) + .arg( + arg!( ... "Path to trainning file and optionally validation file") + .value_parser(clap::value_parser!(PathBuf)), + ), + ) + .subcommand( + Command::new("retrieve") + .about("Retrieve completed fine tune model") + .arg(arg!( "The fine tune job Id")) + .arg_required_else_help(true), + ) + .subcommand( + Command::new("test") + .about("classify prompt as positive or negative") + .arg(arg!( "The remote to target")) + .arg(arg!( "Provide a completion prompt to test your model")) + .arg_required_else_help(true), + ) +} +#[tokio::main] +async fn main() { + let config = OpenAIConfig::new(); + let client = Client::with_config(config); + + let matches = cli().get_matches(); + + match matches.subcommand() { + Some(("data", sub_matches)) => { + let paths = sub_matches + .get_many::("PATH") + .into_iter() + .flatten() + .collect::>(); + data(paths, client).await + } + Some(("retrieve", sub_matches)) => { + let job_id = sub_matches.get_one::("JOB_ID").expect("required"); + retrieve(job_id.to_owned(), client).await + } + Some(("test", sub_matches)) => { + let model = sub_matches + .get_one::("FINE_TUNE_MODEL") + .expect("required"); + let prompt = sub_matches.get_one::("PROMPT").expect("required"); + + completion(model.to_owned(), prompt.to_owned(), client).await + } + _ => unreachable!(), // If all subcommands are defined above, anything else is unreachable!() + } +}