In [1]:
%%capture
!pip install "unsloth[kaggle-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes
!pip install torch==2.3.0+cu121 torchvision==0.18.0+cu121 torchaudio==2.3.0+cu121 -f https://download.pytorch.org/whl/torch_stable.html
!pip install mlflow pyngrok

In [2]:
import mlflow

# Set up MLflow experiment
mlflow.set_tracking_uri("file:///kaggle/working/mlruns")  # Saves runs in /kaggle/working/mlruns
mlflow.set_experiment("Phi_Classification_Finetuining_Experiment")
mlflow.start_run(run_name="Phi-3.5-mini") # write here your pre trained model name

2024/11/17 17:51:23 INFO mlflow.tracking.fluent: Experiment with name 'Phi_Classification_Finetuining_Experiment' does not exist. Creating a new experiment.


<ActiveRun: >

In [3]:
from unsloth import FastLanguageModel
import torch
max_seq_length = None #2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Phi-3.5-mini-instruct",
    #max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.11.7: Fast Llama patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.0+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.26.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.26G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/140 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.37k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

We now add LoRA adapters so we only need to update 1 to 10% of all parameters!

In [4]:
r = 64
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                  "gate_proj", "up_proj", "down_proj","lm_head"]
lora_alpha = 16
lora_dropout = 0.1
bias = "none" 
use_gradient_checkpointing = "unsloth"
random_state = 3407
use_rslora = False
loftq_config = None

lora_configs = {
    "r": r,
    "target_modules": target_modules,
    "lora_alpha": lora_alpha,
    "lora_dropout": lora_dropout,
    "bias": bias,
    "use_gradient_checkpointing": use_gradient_checkpointing,
    "random_state": random_state,
    "use_rslora": use_rslora,
    "loftq_config": loftq_config,
}


In [5]:
model = FastLanguageModel.get_peft_model(
    model,
    r = r, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = target_modules,
    lora_alpha = lora_alpha,
    lora_dropout = lora_dropout, # Supports any, but = 0 is optimized
    bias = bias,    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = use_gradient_checkpointing, # True or "unsloth" for very long context
    random_state = random_state,
    use_rslora = use_rslora,  # We support rank stabilized LoRA
    loftq_config = loftq_config, # And LoftQ
)

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.1.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.


Unsloth: Offloading output_embeddings to disk to save VRAM


Unsloth 2024.11.7 patched 32 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


Unsloth: Training lm_head in mixed precision to save VRAM


In [6]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32064, 3072, padding_idx=32000)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=3072, out_features=3072, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3072, out_features=64, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=64, out_features=3072, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
          

In [7]:
tokenizer

LlamaTokenizerFast(name_or_path='unsloth/phi-3.5-mini-instruct-bnb-4bit', vocab_size=32000, model_max_length=131072, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '<|endoftext|>', 'unk_token': '<unk>', 'pad_token': '<|placeholder6|>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("</s>", rstrip=True, lstrip=False, single_word=False, normalized=False, special=False),
	32000: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	32001: AddedToken("<|assistant|>", rstrip=True, lstrip=False, single_word=False, normalized=False, special=True),
	32002: AddedToken("<|placeholder1|>", rstrip=True, lstrip=False, single_word=False, normalized=Fal

In [8]:
from datasets import load_dataset

dataset = load_dataset('csv', data_files='/kaggle/input/clasification-train/Classification_train.csv', split='train')
dataset

Generating train split: 0 examples [00:00, ? examples/s]

Dataset({
    features: ['dataset_name', 'series_description', 'algorithm', 'hyperparameters'],
    num_rows: 808
})

In [9]:
unique_algorithms = list(set(dataset['algorithm']))
unique_algorithms

['XGBoostClassifier',
 'ElasticNetClassifier',
 'LassoClassifier',
 'GaussianProcessClassifier',
 'AdaboostClassifier',
 'LightgbmClassifier',
 'SVC',
 'RandomForestClassifier']

In [10]:
algo_str = ", ".join(unique_algorithms[:-1]) + ", or " + unique_algorithms[-1] + "."
algo_str

'XGBoostClassifier, ElasticNetClassifier, LassoClassifier, GaussianProcessClassifier, AdaboostClassifier, LightgbmClassifier, SVC, or RandomForestClassifier.'

In [11]:
from collections import Counter
Counter(dataset['algorithm'])

Counter({'AdaboostClassifier': 262,
         'XGBoostClassifier': 208,
         'ElasticNetClassifier': 150,
         'RandomForestClassifier': 149,
         'LassoClassifier': 31,
         'LightgbmClassifier': 5,
         'GaussianProcessClassifier': 2,
         'SVC': 1})

In [12]:
# Split the dataset into train and valid sets
train_rest_split = dataset.train_test_split(test_size=0.3)

In [13]:
train_dataset = train_rest_split['train']
rest_dataset = train_rest_split['test']

In [14]:
valid_test_split = rest_dataset.train_test_split(test_size=0.5)

In [15]:
valid_dataset = valid_test_split["train"]
test_dataset = valid_test_split["train"]

In [16]:
train_dataset

Dataset({
    features: ['dataset_name', 'series_description', 'algorithm', 'hyperparameters'],
    num_rows: 565
})

In [17]:
valid_dataset

Dataset({
    features: ['dataset_name', 'series_description', 'algorithm', 'hyperparameters'],
    num_rows: 121
})

In [18]:
test_dataset

Dataset({
    features: ['dataset_name', 'series_description', 'algorithm', 'hyperparameters'],
    num_rows: 121
})

<a name="Data"></a>
### Data Prep
We now use the `Phi-3` format for prompting

```
<|system|>
You are a helpful assistant.<|end|>
<|user|>
How to explain Internet for a medieval knight?<|end|>
<|assistant|>
```

In [19]:
tokenizer.eos_token

'<|endoftext|>'

In [20]:
EOS_TOKEN = tokenizer.eos_token

train_prompt =  """<|system|>
Below is a description of a time series dataset. Carefully analyze the data characteristics such as feature distributions, stationarity, outliers, class balance, and complexity. Based on this analysis, choose the best-fitting machine learning algorithm from the following search space: {} Only provide the name of the algorithm without any explanation, ensuring that the choice is unbiased and well-informed.<|end|>
<|user|>
{}<|end|>
<|assistant|>
{}"""

def formatting_prompts_func(examples):
    inputs = examples["series_description"]
    outputs = examples["algorithm"]
    texts = []
    for input, output in zip( inputs, outputs):
        text = train_prompt.format(algo_str, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

train_dataset = train_dataset.map(formatting_prompts_func, batched = True)
train_dataset

Map:   0%|          | 0/565 [00:00<?, ? examples/s]

Dataset({
    features: ['dataset_name', 'series_description', 'algorithm', 'hyperparameters', 'text'],
    num_rows: 565
})

In [21]:
train_dataset['text'][1]

'<|system|>\nBelow is a description of a time series dataset. Carefully analyze the data characteristics such as feature distributions, stationarity, outliers, class balance, and complexity. Based on this analysis, choose the best-fitting machine learning algorithm from the following search space: XGBoostClassifier, ElasticNetClassifier, LassoClassifier, GaussianProcessClassifier, AdaboostClassifier, LightgbmClassifier, SVC, or RandomForestClassifier. Only provide the name of the algorithm without any explanation, ensuring that the choice is unbiased and well-informed.<|end|>\n<|user|>\nA multivariate classification time-series dataset consists of 7478 samples and 16 features with 16 numerical and 0 categorical features. Each instance has a window length of 24. The dataset has a sampling rate of 60.0 minutes. The dataset has a missing values percentage of 0.0%. The missing values percentages for numerical features range from 0 to 0 with mean 0.00 and standard deviation 0.00.\n The targ

In [22]:
valid_dataset = valid_dataset.map(formatting_prompts_func, batched = True)
valid_dataset

Map:   0%|          | 0/121 [00:00<?, ? examples/s]

Dataset({
    features: ['dataset_name', 'series_description', 'algorithm', 'hyperparameters', 'text'],
    num_rows: 121
})

In [23]:
valid_dataset['text'][1]

'<|system|>\nBelow is a description of a time series dataset. Carefully analyze the data characteristics such as feature distributions, stationarity, outliers, class balance, and complexity. Based on this analysis, choose the best-fitting machine learning algorithm from the following search space: XGBoostClassifier, ElasticNetClassifier, LassoClassifier, GaussianProcessClassifier, AdaboostClassifier, LightgbmClassifier, SVC, or RandomForestClassifier. Only provide the name of the algorithm without any explanation, ensuring that the choice is unbiased and well-informed.<|end|>\n<|user|>\nA multivariate classification time-series dataset consists of 7627 samples and 16 features with 16 numerical and 0 categorical features. Each instance has a window length of 24. The dataset has a sampling rate of 60.0 minutes. The dataset has a missing values percentage of 0.0%. The missing values percentages for numerical features range from 0 to 0 with mean 0.00 and standard deviation 0.00.\n The targ

<a name="Train"></a>
### Train the model

In [24]:
from unsloth import FastLanguageModel ,is_bfloat16_supported
from transformers import TrainingArguments
from trl import SFTTrainer


per_device_train_batch_size = 4
gradient_accumulation_steps = 4
gradient_checkpointing=True
warmup_steps = 5
max_steps = 22
learning_rate = 3e-4
fp16 = not is_bfloat16_supported()
bf16 = is_bfloat16_supported()
logging_steps = 1
optim = "adamw_8bit"
weight_decay = 0.01
lr_scheduler_type = "linear"
seed = 3407
output_dir = "./phi_outputs"
evaluation_strategy="steps"


train_args = {
    "per_device_train_batch_size" : per_device_train_batch_size,
    "gradient_accumulation_steps" : gradient_accumulation_steps,
    "gradient_checkpointing" : gradient_checkpointing,
    "warmup_steps" : warmup_steps,
    "max_steps" : max_steps, 
    "learning_rate" : learning_rate,
    "fp16" : fp16,
    "bf16" : bf16,
    "logging_steps" : logging_steps,
    "optim" : optim,
    "weight_decay" : weight_decay,
    "lr_scheduler_type" : lr_scheduler_type,
    "seed" : seed,
    "output_dir" : output_dir,
    "evaluation_strategy":evaluation_strategy, 
}


training_arguments= TrainingArguments(
        per_device_train_batch_size = per_device_train_batch_size,
        gradient_accumulation_steps = gradient_accumulation_steps,
        gradient_checkpointing=gradient_checkpointing,
        warmup_steps = warmup_steps,
        max_steps = max_steps, 
        learning_rate = learning_rate,
        fp16 = fp16,
        bf16 = bf16,
        logging_steps = logging_steps,
        optim = optim,
        weight_decay = weight_decay,
        lr_scheduler_type = lr_scheduler_type,
        seed = seed,
        output_dir = output_dir,
        evaluation_strategy = evaluation_strategy, 
    )



In [25]:
from trl import  DataCollatorForCompletionOnlyLM

instruction_template="<|user|>"
response_template = "<|assistant|>"

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = training_arguments,
    data_collator =  DataCollatorForCompletionOnlyLM(instruction_template=instruction_template,
                                                     response_template=response_template,
                                                     tokenizer=tokenizer,mlm=False),
    #callbacks=[SaveBestModelCallback()]
)

Map (num_proc=2):   0%|          | 0/565 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/121 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [26]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla T4. Max memory = 14.741 GB.
2.998 GB of memory reserved.


In [27]:
# Log all parameters, metrics will be auto logged 
mlflow.log_params(lora_configs)
mlflow.log_params(train_args)

In [28]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 565 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 4
\        /    Total batch size = 16 | Total steps = 22
 "-____-"     Number of trainable parameters = 218,038,272
[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112988433332147, max=1.0…

Step,Training Loss,Validation Loss
1,8.6638,8.444028
2,8.3152,7.729608
3,7.8538,4.918957
4,4.8254,2.383049
5,2.4184,1.347579
6,1.3517,0.36898
7,0.2968,0.334659
8,0.3536,0.310947
9,0.2614,0.288821
10,0.2735,0.270441


In [29]:
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory-start_gpu_memory, 3)
used_percentage = round(used_memory/max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

1564.0376 seconds used for training.
26.07 minutes used for training.
Peak reserved memory = 11.977 GB.
Peak reserved memory for training = 8.979 GB.
Peak reserved memory % of max memory = 81.25 %.
Peak reserved memory for training % of max memory = 60.912 %.


In [30]:
model.save_pretrained("phi_model")
tokenizer.save_pretrained("phi_model")

('phi_model/tokenizer_config.json',
 'phi_model/special_tokens_map.json',
 'phi_model/tokenizer.model',
 'phi_model/added_tokens.json',
 'phi_model/tokenizer.json')

<a name="Inference"></a>
### Inference

In [31]:
test_prompt =  """<|system|>
Below is a description of a time series dataset. Carefully analyze the data characteristics such as feature distributions, stationarity, outliers, class balance, and complexity. Based on this analysis, choose the best-fitting machine learning algorithm from the following search space: {} Only provide the name of the algorithm without any explanation, ensuring that the choice is unbiased and well-informed.<|end|>
<|user|>
{}<|end|>
<|assistant|>"""

def formatting_test_prompts_func(examples):
    inputs = examples["series_description"]
    texts = []
    for input in inputs:
        # Only format with input, without including the output and without including the EOS_TOKEN
        text = test_prompt.format(algo_str, input)
        texts.append(text)
    return {"text": texts}

In [32]:

test_dataset = test_dataset.map(formatting_test_prompts_func, batched = True)
test_dataset

Map:   0%|          | 0/121 [00:00<?, ? examples/s]

Dataset({
    features: ['dataset_name', 'series_description', 'algorithm', 'hyperparameters', 'text'],
    num_rows: 121
})

In [33]:
test_dataset['text'][0]

'<|system|>\nBelow is a description of a time series dataset. Carefully analyze the data characteristics such as feature distributions, stationarity, outliers, class balance, and complexity. Based on this analysis, choose the best-fitting machine learning algorithm from the following search space: XGBoostClassifier, ElasticNetClassifier, LassoClassifier, GaussianProcessClassifier, AdaboostClassifier, LightgbmClassifier, SVC, or RandomForestClassifier. Only provide the name of the algorithm without any explanation, ensuring that the choice is unbiased and well-informed.<|end|>\n<|user|>\nA multivariate classification time-series dataset consists of 7800 samples and 16 features with 16 numerical and 0 categorical features. Each instance has a window length of 24. The dataset has a sampling rate of 60.0 minutes. The dataset has a missing values percentage of 0.0%. The missing values percentages for numerical features range from 0 to 0 with mean 0.00 and standard deviation 0.00.\n The targ

In [34]:
test_dataset['algorithm'][0]

'RandomForestClassifier'

In [35]:
FastLanguageModel.for_inference(model)
inputs = tokenizer(
[test_dataset['text'][0]], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

['<|system|> Below is a description of a time series dataset. Carefully analyze the data characteristics such as feature distributions, stationarity, outliers, class balance, and complexity. Based on this analysis, choose the best-fitting machine learning algorithm from the following search space: XGBoostClassifier, ElasticNetClassifier, LassoClassifier, GaussianProcessClassifier, AdaboostClassifier, LightgbmClassifier, SVC, or RandomForestClassifier. Only provide the name of the algorithm without any explanation, ensuring that the choice is unbiased and well-informed.<|end|><|user|> A multivariate classification time-series dataset consists of 7800 samples and 16 features with 16 numerical and 0 categorical features. Each instance has a window length of 24. The dataset has a sampling rate of 60.0 minutes. The dataset has a missing values percentage of 0.0%. The missing values percentages for numerical features range from 0 to 0 with mean 0.00 and standard deviation 0.00.\n The target 

In [36]:
test_responses = []
# get all test data inference result
for test_prompt in test_dataset['text']:
  inputs = tokenizer(
  [test_prompt], return_tensors = "pt").to("cuda")

  outputs = model.generate(**inputs, max_new_tokens = 10, use_cache = True)
  test_responses.append(tokenizer.batch_decode(outputs))

In [37]:
# Convert the dataset to a Pandas DataFrame
df = test_dataset.to_pandas()

In [38]:
df['model_responses']= test_responses

In [39]:
# Save the DataFrame as a CSV file
df.to_csv('phi_result_unsloth.csv', index=False)

In [40]:
df['model_responses'][0]

['<|system|> Below is a description of a time series dataset. Carefully analyze the data characteristics such as feature distributions, stationarity, outliers, class balance, and complexity. Based on this analysis, choose the best-fitting machine learning algorithm from the following search space: XGBoostClassifier, ElasticNetClassifier, LassoClassifier, GaussianProcessClassifier, AdaboostClassifier, LightgbmClassifier, SVC, or RandomForestClassifier. Only provide the name of the algorithm without any explanation, ensuring that the choice is unbiased and well-informed.<|end|><|user|> A multivariate classification time-series dataset consists of 7800 samples and 16 features with 16 numerical and 0 categorical features. Each instance has a window length of 24. The dataset has a sampling rate of 60.0 minutes. The dataset has a missing values percentage of 0.0%. The missing values percentages for numerical features range from 0 to 0 with mean 0.00 and standard deviation 0.00.\n The target 

In [41]:
predictions=[]
for response in test_responses:
  predictions.append( response[0].split(response_template)[1].split("<|end")[0].strip())

In [42]:
len(predictions)

121

In [43]:
actual_data= df['algorithm']
len(actual_data)

121

In [44]:
for i in zip(actual_data.to_list(), predictions):
    print(i)


('RandomForestClassifier', 'ElasticNetClassifier')
('XGBoostClassifier', 'AdaboostClassifier')
('XGBoostClassifier', 'AdaboostClassifier')
('ElasticNetClassifier', 'ElasticNetClassifier')
('XGBoostClassifier', 'ElasticNetClassifier')
('AdaboostClassifier', 'ElasticNetClassifier')
('LassoClassifier', 'ElasticNetClassifier')
('AdaboostClassifier', 'ElasticNetClassifier')
('ElasticNetClassifier', 'ElasticNetClassifier')
('XGBoostClassifier', 'AdaboostClassifier')
('AdaboostClassifier', 'ElasticNetClassifier')
('AdaboostClassifier', 'ElasticNetClassifier')
('XGBoostClassifier', 'AdaboostClassifier')
('XGBoostClassifier', 'AdaboostClassifier')
('AdaboostClassifier', 'ElasticNetClassifier')
('XGBoostClassifier', 'ElasticNetClassifier')
('XGBoostClassifier', 'ElasticNetClassifier')
('RandomForestClassifier', 'ElasticNetClassifier')
('XGBoostClassifier', 'ElasticNetClassifier')
('RandomForestClassifier', 'ElasticNetClassifier')
('AdaboostClassifier', 'ElasticNetClassifier')
('AdaboostClassifie

In [45]:
from sklearn.metrics import f1_score

accuracy = sum(1 for true, pred in zip(actual_data, predictions) if true == pred) / len(actual_data)
f1 = f1_score(actual_data, predictions, average='weighted')

print("Accuracy:", accuracy)
print("F1 Score:", f1)

Accuracy: 0.2727272727272727
F1 Score: 0.17666479719791328


In [46]:
import shutil
import os

mlruns_path = "/kaggle/working/mlruns"  # Path where MLflow runs are saved
zip_path = "/kaggle/working/mlruns.zip"  # Location for zipped file

if os.path.exists(mlruns_path):
    shutil.make_archive(base_name=zip_path.replace('.zip', ''), format='zip', root_dir=mlruns_path)
    print("MLruns directory successfully zipped.")
else:
    print("MLruns directory does not exist.")

MLruns directory successfully zipped.


### Save the model

In [47]:
secret_hf = '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'
!huggingface-cli login --token $secret_hf

  pid, fd = os.forkpty()


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [48]:
# Online saving on HF
new_model_adabtor= "unsloth-Phi-3.5-mini-tuned"
model.push_to_hub(new_model_adabtor)
tokenizer.push_to_hub(new_model_adabtor)

README.md:   0%|          | 0.00/600 [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/872M [00:00<?, ?B/s]

Saved model to https://huggingface.co/unsloth-Phi-3.5-mini-tuned


No files have been modified since last commit. Skipping to prevent empty commit.
