# Installing The Required Packages

In [None]:
%%capture
!pip install transformers accelerate bitsandbytes peft huggingface_hub
!pip install --upgrade transformers accelerate bitsandbytes peft huggingface_hub
!pip install ipywidgets --upgrade
!jupyter nbextension enable --py widgetsnbextension

# Model Loader

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel, PeftConfig
import os


# Load the PEFT configuration
peft_model_id = 'alinasser930/unsloth-mistral-tuned_model'

peft_config = PeftConfig.from_pretrained(
    peft_model_id,
)
base_model_name = peft_config.base_model_name_or_path

print(f"Base model: {base_model_name}")

# Load the base model and tokenizer
quantization_config = BitsAndBytesConfig(
    load_in_8bit=True
)

tokenizer = AutoTokenizer.from_pretrained(
    base_model_name,
    trust_remote_code=True
)

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    trust_remote_code=True,
    device_map='auto'
)

# Load the LoRA adapter
model = PeftModel.from_pretrained(
    base_model,
    peft_model_id,
    trust_remote_code=True,
    device_map='auto'
)

# Prepare for inference
model.eval()


test_prompt = """Given the following description of time series data, identify the single best fitting machine learning algorithm from the provided list. Do not include any explanation, and only provide the algorithm's name.

Available algorithms: AdaboostClassifier, ElasticNetClassifier, LassoClassifier, LightgbmClassifier, SVC, GaussianProcessClassifier, RandomForestClassifier, XGBoostClassifier.

### DESCRIPTION:

{}

### ALGORITHM:
"""

description = "A multivariate classification time-series dataset consists of 7606 samples and 16 features with 16 numerical and 0 categorical features. Each instance has a window length of 24. The dataset has a sampling rate of 60.0 minutes. The dataset has a missing values percentage of 0.0%. The missing values percentages for numerical features range from 0 to 0 with mean 0.00 and standard deviation 0.00.\n The target column has 3 classes with entropy value 1.47 showing a Unbalanced dataset. Among the 7606 samples the target ground-truth class has changed 1618 times representing a percentage of 21.37%. There are 16 features in the dataset\n Among the numerical predictors, the series has 16 numerical features detected as Stationary out of the 16 numerical features using the dickey-fuller test and the rest are Unstationary. 15 of them are Multiplicative time-series features and the rest are Additive time-series features. There is an average of 0 seasonality components detected in the numerical predictors. The top 0 common seasonality components are represented using sinusoidal waves. The numerical predictors also exhibit skewness values ranging from 0.011. to 15.884 and kurtosis values of 0.00 to 321.97. The fractal dimension analysis yields values ranging from -0.66 to -0.11 indicating a Complex and Irregular time-series structure for the numerical predictors. The correlation values among the numerical predictors have a minimum of -0.95, maximum 1.00, mean 0.10, and standard deviation 0.48. The count of numerical predictors with outliers is 15 with the minimum percentage of 0.00%, maximum percentage of 14.41%, average percentage of 5.89%, and standard deviation percentage of 4.64%.\n\nThe dataset is converted into a simple classification task by extracting the previously described features."


# Format the prompt with the description
formatted_prompt = test_prompt.format(description)

# Tokenize the input
inputs = tokenizer(
    formatted_prompt,
    return_tensors='pt'
).to(model.device)

# Generate the output
with torch.no_grad():
    outputs = model.generate(
        input_ids=inputs['input_ids'],
        attention_mask=inputs['attention_mask'],
        max_new_tokens=10,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
    )

adapter_config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

Base model: unsloth/mistral-7b-instruct-v0.2-bnb-4bit


tokenizer_config.json:   0%|          | 0.00/2.13k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.20k [00:00<?, ?B/s]

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


model.safetensors:   0%|          | 0.00/4.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/155 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/1.20G [00:00<?, ?B/s]

# Model Output

In [None]:
# Decode and display the response
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
response_text = response.split('### ALGORITHM:')[-1].strip()

# List of algorithms
algorithms = [
    'AdaboostClassifier',
    'ElasticNetClassifier',
    'LassoClassifier',
    'LightgbmClassifier',
    'SVC',
    'GaussianProcessClassifier',
    'RandomForestClassifier',
    'XGBoostClassifier',
]

# Extract the algorithm name
selected_algorithm = None
for algo in algorithms:
    if algo.lower() in response_text.lower():
        selected_algorithm = algo
        break

if selected_algorithm:
     print(f"The Best Alogrithm for This description is: {selected_algorithm}")
else:
    print("No algorithm found in the response.")


The Best Alogrithm for This description is: AdaboostClassifier
