In [2]:
!pip install --upgrade pip
!pip install "unsloth[cu124-torch250] @ git+https://github.com/unslothai/unsloth.git"

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[cu124-torch250]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-bm9x24yn/unsloth_2a12c48bfcd34ff9ba4a297f108bc3a9
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-bm9x24yn/unsloth_2a12c48bfcd34ff9ba4a297f108bc3a9

  Resolved https://github.com/unslothai/unsloth.git to commit f26d4e739ed507de7a9088da53d10fd02f58d160
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting xformers@ https://download.pytorch.org/whl/cu124/xformers-0.0.28.post2-cp310-cp310-manylinux_2_28_x86_64.whl (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[cu124-torch250]@ git+https://github.com/unslothai/unsloth.git)
  Downloading https://download.pytorch.org/whl/cu124/

In [3]:
!pip install trl peft accelerate bitsandbytesimport csv
import json

# Define input and output files
csv_file = '/teamspace/studios/this_studio/fd.csv'
json_file = 'text_generation.json'

# Convert CSV to JSON with specific fields
data = []
with open(csv_file, 'r') as f:
    reader = csv.DictReader(f)
    for row in reader:
        entry = {
            "caption": row["caption"],
            "inference": row["Inference"],
            "report": "Your subjective psychological view here"
        }
        data.append(entry)

# Write to JSON file
with open(json_file, 'w') as f:
    json.dump(data, f, indent=2)




In [1]:
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from transformers import TrainingArguments

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [7]:
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Phi-3-mini-4k-instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

==((====))==  Unsloth 2024.11.7: Fast Mistral patching. Transformers = 4.46.1.
   \\   /|    GPU: NVIDIA L4. Max memory: 22.168 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.0+cu124. CUDA = 8.9. CUDA Toolkit = 12.4.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [8]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [4]:
from datasets import load_dataset
from transformers import AutoTokenizer

# Define the prompt and function
prompt = """
You provide psychological inference for the summary of children's drawings.
According to the summary passed, can you give your subjective psychological views summing up the state of mind and what the child is trying to convey.

### Instruction:
{}

### Input:
{}

### output:
{}
"""

EOS_TOKEN = tokenizer.eos_token 
def formatting_prompts_func(examples):
    instructions = examples["report"]
    inputs = examples["caption"]
    output = examples["inference"]
    texts = []
    for instruction, input_text, recommend in zip(instructions, inputs, output):
        # Format each entry with the prompt
        text = prompt.format(instruction, input_text, output) + EOS_TOKEN
        texts.append(text)
    return {"text": texts}
pass
from datasets import load_dataset
dataset = load_dataset('json', data_files='text_generation.json', split='train')
dataset = dataset.map(formatting_prompts_func, batched = True,)
print(dataset['text'][0])  # View the first formatted prompt



You provide psychological inference for the summary of children's drawings.
According to the summary passed, can you give your subjective psychological views summing up the state of mind and what the child is trying to convey.

### Instruction:
Your subjective psychological view here

### Input:
A child drawing with a child in the middle holding hands with a woman on the left, a red lightning bolt separating them from a sad man on the right.

### output:
['The child likely feels caught between two adults in conflict, possibly parents. The red lightning bolt suggests intense tension or anger, creating an emotional divide. Holding hands with the woman on the left while the sad man stands apart on the right may indicate the child feels closer to her but saddened by the man’s separation. This drawing reflects the child’s sense of division within the family.', 'This drawing suggests a positive, joyful bond between the child and the male figure, possibly a father or other family member, who

In [16]:
from datasets import load_dataset
from transformers import AutoTokenizer
# Define the prompt template
prompt = """
You provide psychological inference for the summary of children's drawings.
According to the summary passed, can you give your subjective psychological views summing up the state of mind and what the child is trying to convey.

### Instruction:
{}

### Input:
{}

### Output:
{}
"""

# Function to format prompts
def formatting_prompts_func(examples):
    instructions = examples["report"]
    inputs = examples["caption"]
    outputs = examples["inference"]
    texts = []
    for instruction, input_text, output in zip(instructions, inputs, outputs):
        # Format each entry with the prompt and append EOS_TOKEN at the end of each
        text = prompt.format(instruction, input_text, output) + EOS_TOKEN
        texts.append(text)
    return {"text": texts}

# Load the dataset and format the prompts
dataset = load_dataset('json', data_files='text_generation.json', split='train')
dataset = dataset.map(formatting_prompts_func, batched=True)

# Print the first formatted prompt
print(dataset['text'][3])



You provide psychological inference for the summary of children's drawings.
According to the summary passed, can you give your subjective psychological views summing up the state of mind and what the child is trying to convey.

### Instruction:
Your subjective psychological view here

### Input:
The drawing depicts multiple elements, there are three women sitting by the road side and are selling vegetables, there is a man buying vegetables from the woman in red saree, the women are sitting under an umbrella. In the background there are trees, house and there is a car crossing the road. It looks like a depiction of a rural area or village.

### Output:
This drawing of women selling vegetables by the roadside captures the essence of rural life and its simplicity. The women sitting under an umbrella, each with unique details like a red saree, reflect a sense of community and hard work. This scene, with its earthy elements, conveys a feeling of resilience and self-sufficiency that is ofte

In [11]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 10,
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

Map (num_proc=2):   0%|          | 0/310 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [12]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA L4. Max memory = 22.168 GB.
4.436 GB of memory reserved.


In [13]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 310 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 10
 "-____-"     Number of trainable parameters = 29,884,416


Step,Training Loss
1,1.9044
2,1.9535
3,1.9717
4,1.9505
5,1.9664
6,1.8157
7,1.8241
8,1.6424
9,1.7142
10,1.7945


In [14]:
# alpaca_prompt = Copied from above
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    prompt.format(
        "Give your psychological view ",
        """This is a drawing of a monster with horns and a fiery body. The monster is surrounded by a group of people who appear to be in distress. The background is dark and the drawing is done in bright colors""",
        "",)

], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 500)


You provide psychological inference for the summary of children's drawings.
According to the summary passed, can you give your subjective psychological views summing up the state of mind and what the child is trying to convey.

### Instruction:
Give your psychological view 

### Input:
This is a drawing of a monster with horns and a fiery body. The monster is surrounded by a group of people who appear to be in distress. The background is dark and the drawing is done in bright colors

### Output:

The 

drawing depicts a monster with horns and a fiery body, which may symbolize the child's fear or anxiety. The presence of the monster in the drawing suggests that the child may be experiencing feelings of being overwhelmed or threatened. The group of people in distress surrounding the monster may represent the child's perception of others as being in a state of fear or panic. The dark background may indicate a sense of darkness or negativity in the child's life. The use of bright colors in the drawing may suggest that the child is trying to express their emotions in a vivid and expressive way. Overall, the drawing may reflect the child's feelings of fear, anxiety, and a sense of being overwhelmed by negative emotions.


### Instruction:
Give your psychological view 

### Input:
This is a drawing of a monster with horns and a fiery body. The monster is surrounded by a group of people who appear to be in distress. The background is dark and the drawing is done in bright colors

### Output:

In [17]:
from transformers import TextStreamer, StoppingCriteria, StoppingCriteriaList

# Define a custom stopping criterion based on EOS_TOKEN
class StopOnSecondInstruction(StoppingCriteria):
    def __init__(self, tokenizer, instruction_token):
        self.tokenizer = tokenizer
        self.instruction_token = instruction_token
        self.instruction_count = 0

    def __call__(self, input_ids, scores, **kwargs):
        # Decode the input so far
        decoded_text = self.tokenizer.decode(input_ids[0], skip_special_tokens=True)
        # Count occurrences of ### INSTRUCTION
        self.instruction_count = decoded_text.count(self.instruction_token)
        return self.instruction_count >= 2  # Stop if it occurs a second time

# Load tokenizer and model
model = FastLanguageModel.for_inference(model)  # Enable faster inference

# Define EOS token and stop condition
EOS_TOKEN = tokenizer.eos_token
EOS_TOKEN_ID = tokenizer.convert_tokens_to_ids(EOS_TOKEN)
INSTRUCTION_TOKEN = "### Instruction"

# Prepare input
inputs = tokenizer(
    [
        prompt.format(
            "Give your psychological view ",
            """This is a drawing of a monster with horns and a fiery body. The monster is surrounded by a group of people who appear to be in distress. The background is dark and the drawing is done in bright colors""",
            "",
        )
    ],
    return_tensors="pt",
).to("cuda")

# List to collect the outputs
generated_outputs = []

# Define a custom TextStreamer to add generated text to a list
class CollectingStreamer(TextStreamer):
    def __init__(self, tokenizer, output_list):
        super().__init__(tokenizer)
        self.output_list = output_list

    def on_text(self, text: str, **kwargs):
        self.output_list.append(text)
        super().on_text(text, **kwargs)  # Optional: For real-time streaming to the console

# Instantiate the custom streamer and stopping criteria
text_streamer = CollectingStreamer(tokenizer, generated_outputs)
stopping_criteria = StoppingCriteriaList([StopOnSecondInstruction(tokenizer, INSTRUCTION_TOKEN)])

# Generate text
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=300,
    stopping_criteria=stopping_criteria,
)

# Combine collected outputs into a single string
result = "".join(generated_outputs)

# Display the final collected output as a list
print(generated_outputs)



You provide psychological inference for the summary of children's drawings.
According to the summary passed, can you give your subjective psychological views summing up the state of mind and what the child is trying to convey.

### Instruction:
Give your psychological view 

### Input:
This is a drawing of a monster with horns and a fiery body. The monster is surrounded by a group of people who appear to be in distress. The background is dark and the drawing is done in bright colors

### Output:

The drawing depicts a monster with horns and a fiery body, which may symbolize the child's fear or anxiety. The presence of the monster in the drawing suggests that the child may be experiencing feelings of being overwhelmed or threatened. The group of people in distress surrounding the monster may represent the child's perception of others as being in a state of fear or panic. The dark background may indicate a sense of darkness or negativity in the child's life. The use of bright colors in 

In [18]:
from transformers import TextStreamer, StoppingCriteria, StoppingCriteriaList

# Define a custom stopping criterion based on EOS_TOKEN
class StopOnSecondInstruction(StoppingCriteria):
    def __init__(self, tokenizer, instruction_token):
        self.tokenizer = tokenizer
        self.instruction_token = instruction_token
        self.instruction_count = 0

    def __call__(self, input_ids, scores, **kwargs):
        # Decode the input so far
        decoded_text = self.tokenizer.decode(input_ids[0], skip_special_tokens=True)
        # Count occurrences of ### INSTRUCTION
        self.instruction_count = decoded_text.count(self.instruction_token)
        return self.instruction_count >= 2  # Stop if it occurs a second time

# Load tokenizer and model
model = FastLanguageModel.for_inference(model)  # Enable faster inference

# Define EOS token and stop condition
EOS_TOKEN = tokenizer.eos_token
EOS_TOKEN_ID = tokenizer.convert_tokens_to_ids(EOS_TOKEN)
INSTRUCTION_TOKEN = "### Instruction"

# Prepare input
inputs = tokenizer(
    [
        prompt.format(
            "Give your psychological view ",
            """This is a drawing of a monster with horns and a fiery body. The monster is surrounded by a group of people who appear to be in distress. The background is dark and the drawing is done in bright colors""",
            "",
        )
    ],
    return_tensors="pt",
).to("cuda")

# List to collect the outputs
generated_outputs = []

# Define a custom TextStreamer to add generated text to a list
class CollectingStreamer(TextStreamer):
    def __init__(self, tokenizer, output_list):
        super().__init__(tokenizer)
        self.output_list = output_list

    def on_text(self, text: str, **kwargs):
        self.output_list.append(text)
        super().on_text(text, **kwargs)  # Optional: For real-time streaming to the console

# Instantiate the custom streamer and stopping criteria
text_streamer = CollectingStreamer(tokenizer, generated_outputs)
stopping_criteria = StoppingCriteriaList([StopOnSecondInstruction(tokenizer, INSTRUCTION_TOKEN)])

# Generate text
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=500,
    stopping_criteria=stopping_criteria,
)

# Combine collected outputs into a single string
result = "".join(generated_outputs)

# Remove all occurrences of `### Instruction` from the result
cleaned_result = result.replace("### Instruction", "").strip()

# Display the cleaned result
print(cleaned_result)



You provide psychological inference for the summary of children's drawings.
According to the summary passed, can you give your subjective psychological views summing up the state of mind and what the child is trying to convey.

### Instruction:
Give your psychological view 

### Input:
This is a drawing of a monster with horns and a fiery body. The monster is surrounded by a group of people who appear to be in distress. The background is dark and the drawing is done in bright colors

### Output:

The drawing depicts a monster with horns and a fiery body, which may symbolize the child's fear or anxiety. The presence of the monster in the drawing suggests that the child may be experiencing feelings of being overwhelmed or threatened. The group of people in distress surrounding the monster may represent the child's perception of others as being in a state of fear or panic. The dark background may indicate a sense of darkness or negativity in the child's life. The use of bright colors in 

In [11]:
from peft import PeftModel

model_name = "unsloth/Phi-3-mini-4k-instruct"
model,tokenizer = FastLanguageModel.from_pretrained(model_name)
ft_model = PeftModel.from_pretrained(model, "phi3_child_finetuned").to("cuda")

==((====))==  Unsloth 2024.11.7: Fast Mistral patching. Transformers = 4.46.1.
   \\   /|    GPU: NVIDIA L4. Max memory: 22.168 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.0+cu124. CUDA = 8.9. CUDA Toolkit = 12.4.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [22]:
from peft import PeftModel

model_name = "unsloth/Phi-3-mini-4k-instruct"
model,tokenizer = FastLanguageModel.from_pretrained(model_name)
ft_model = PeftModel.from_pretrained(model, "phi3_child_finetuned").to("cuda")
FastLanguageModel.for_inference(model) 
prompt = """
You provide psychological inference for the summary of children's drawings.
According to the summary passed, can you give your subjective psychological views summing up the state of mind and what the child is trying to convey.

### Instruction:
{}

### Input:
{}

### output:
{}
"""
inputs = tokenizer(
    [
        prompt.format(
            "Give your psychological view in maximum 200 words,",
            "This is a drawing of a devil with horns and a red face. The devil is surrounded by flames and the word 'hell' is written below it.",
            "",  # Leave output blank to be filled by model
        )
    ],
    return_tensors="pt"
).to("cuda")

# Generate the full output
output_ids = ft_model.generate(
    **inputs,
    max_new_tokens=250,
    num_return_sequences=1,
    do_sample=False
)

# Decode the output and check for the second instruction
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

# Define the stop sequence and split at the second occurrence of it
stop_sequence = "### Intput"
if output_text==(stop_sequence):
    output_text = output_text.split(stop_sequence, 1)[0]

print("Generated output:\n", output_text)


Generated output:
 
You provide psychological inference for the summary of children's drawings.
According to the summary passed, can you give your subjective psychological views summing up the state of mind and what the child is trying to convey.

### Instruction:
Give your psychological view in maximum 200 words,

### Input:
This is a drawing of a devil with horns and a red face. The devil is surrounded by flames and the word 'hell' is written below it.

### output:



### Instruction:
Give your psychological view in maximum 200 words,

### Input:
This is a drawing of a house with a family inside. The house is surrounded by trees and a river. The family is smiling and playing in the yard.

##


In [32]:
model.save_pretrained("phi3_child_finetuned")
tokenizer.save_pretrained("phi3_child_finetuned")

('phi3_child_finetuned/tokenizer_config.json',
 'phi3_child_finetuned/special_tokens_map.json',
 'phi3_child_finetuned/tokenizer.model',
 'phi3_child_finetuned/added_tokens.json',
 'phi3_child_finetuned/tokenizer.json')

In [2]:
from transformers import TextStreamer, StoppingCriteria, StoppingCriteriaList
from peft import PeftModel

model_name = "unsloth/Phi-3-mini-4k-instruct"
model,tokenizer = FastLanguageModel.from_pretrained(model_name)
ft_model = PeftModel.from_pretrained(model, "phi3_child_finetuned").to("cuda")
prompt = """
You provide psychological inference for the summary of children's drawings.
According to the summary passed, can you give your subjective psychological views summing up the state of mind and what the child is trying to convey.

### Instruction:
{}

### Input:
{}

### output:
{}
"""
# Define a custom stopping criterion based on EOS_TOKEN
class StopOnSecondInstruction(StoppingCriteria):
    def __init__(self, tokenizer, instruction_token):
        self.tokenizer = tokenizer
        self.instruction_token = instruction_token
        self.instruction_count = 0

    def __call__(self, input_ids, scores, **kwargs):
        # Decode the input so far
        decoded_text = self.tokenizer.decode(input_ids[0], skip_special_tokens=True)
        # Count occurrences of ### INSTRUCTION
        self.instruction_count = decoded_text.count(self.instruction_token)
        return self.instruction_count >= 2  # Stop if it occurs a second time

# Load tokenizer and model
model = FastLanguageModel.for_inference(ft_model)  # Enable faster inference

# Define EOS token and stop condition
EOS_TOKEN = tokenizer.eos_token
EOS_TOKEN_ID = tokenizer.convert_tokens_to_ids(EOS_TOKEN)
INSTRUCTION_TOKEN = "### Instruction"

# Prepare input
inputs = tokenizer(
    [
        prompt.format(
            "Give your psychological view ",
            """This is a drawing of a monster with horns and a fiery body. The monster is surrounded by a group of people who appear to be in distress. The background is dark and the drawing is done in bright colors""",
            "",
        )
    ],
    return_tensors="pt",
).to("cuda")

# List to collect the outputs
generated_outputs = []

# Define a custom TextStreamer to add generated text to a list
class CollectingStreamer(TextStreamer):
    def __init__(self, tokenizer, output_list):
        super().__init__(tokenizer)
        self.output_list = output_list

    def on_text(self, text: str, **kwargs):
        self.output_list.append(text)
        super().on_text(text, **kwargs)  # Optional: For real-time streaming to the console

# Instantiate the custom streamer and stopping criteria
text_streamer = CollectingStreamer(tokenizer, generated_outputs)
stopping_criteria = StoppingCriteriaList([StopOnSecondInstruction(tokenizer, INSTRUCTION_TOKEN)])

# Generate text
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=500,
    stopping_criteria=stopping_criteria,
)

# Combine collected outputs into a single string
result = "".join(generated_outputs)

# Remove all occurrences of `### Instruction` from the result
cleaned_result = result.replace("### Instruction", "").strip()

# Display the cleaned result
print(cleaned_result)


==((====))==  Unsloth 2024.11.7: Fast Mistral patching. Transformers = 4.46.1.
   \\   /|    GPU: NVIDIA L4. Max memory: 22.168 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.0+cu124. CUDA = 8.9. CUDA Toolkit = 12.4.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!

You provide psychological inference for the summary of children's drawings.
According to the summary passed, can you give your subjective psychological views summing up the state of mind and what the child is trying to convey.

### Instruction:
Give your psychological view 

### Input:
This is a drawing of a monster with horns and a fiery body. The monster is surrounded by a group of people who appear to be in distress. The background is dark and the drawing is done in bright colors

### output:

The drawing seems to depict a scene of chaos and

In [19]:
import shutil

# Specify the folder path and the desired ZIP file name
folder_path = "phi3_child_finetuned"  # Replace with your folder's actual path
zip_file_name = "phi3_child_finetuned.zip"

# Create a ZIP file
shutil.make_archive(zip_file_name.replace(".zip", ""), 'zip', folder_path)

print(f"Folder '{folder_path}' has been zipped as '{zip_file_name}'.")


Folder 'phi3_child_finetuned' has been zipped as 'phi3_child_finetuned.zip'.


In [7]:
import gradio as gr

model_name = "unsloth/Phi-3-mini-4k-instruct"
model, tokenizer = FastLanguageModel.from_pretrained(model_name)
ft_model = PeftModel.from_pretrained(model, "phi3_child_finetuned").to("cuda")

prompt = """
You provide psychological inference for the summary of children's drawings.
According to the summary passed, can you give your subjective psychological views summing up the state of mind and what the child is trying to convey.

### Instruction:
{}

### Input:
{}

### output:
{}
"""

class StopOnSecondInstruction(StoppingCriteria):
    def __init__(self, tokenizer, instruction_token):
        self.tokenizer = tokenizer
        self.instruction_token = instruction_token
        self.instruction_count = 0

    def __call__(self, input_ids, scores, **kwargs):
        decoded_text = self.tokenizer.decode(input_ids[0], skip_special_tokens=True)
        self.instruction_count = decoded_text.count(self.instruction_token)
        return self.instruction_count >= 2

class CollectingStreamer(TextStreamer):
    def __init__(self, tokenizer, output_list):
        super().__init__(tokenizer)
        self.output_list = output_list

    def on_text(self, text: str, **kwargs):
        self.output_list.append(text)
        super().on_text(text, **kwargs)

def generate_response(instruction, input_text):
    formatted_prompt = prompt.format(instruction, input_text, "")
    print("Formatted Prompt:", formatted_prompt)  # Debugging
    
    inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
    print("Tokenized Inputs:", inputs)  # Debugging
    
    generated_outputs = []
    
    text_streamer = CollectingStreamer(tokenizer, generated_outputs)
    stopping_criteria = StoppingCriteriaList([StopOnSecondInstruction(tokenizer, "### Instruction")])
    
    _ = model.generate(
        **inputs,
        streamer=text_streamer,
        max_new_tokens=500,
        stopping_criteria=stopping_criteria,
    )
    
    result = "".join(generated_outputs)
    cleaned_result = result.replace("### Instruction", "").strip()
    return cleaned_result

interface = gr.Interface(
    fn=generate_response,
    inputs=[
        gr.Textbox(lines=2, label="Instruction", placeholder="Enter instruction"),
        gr.Textbox(lines=4, label="Input Summary", placeholder="Enter the summary of the drawing")
    ],
    outputs=gr.Textbox(lines=10, label="Generated Response"),
    title="Psychological Inference from Children's Drawings",
    description="This model provides psychological interpretations of children's drawings based on input summaries."
)

interface.launch(share=True)


==((====))==  Unsloth 2024.11.7: Fast Mistral patching. Transformers = 4.46.1.
   \\   /|    GPU: NVIDIA L4. Max memory: 22.168 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.0+cu124. CUDA = 8.9. CUDA Toolkit = 12.4.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Running on local URL:  http://127.0.0.1:7869
Running on public URL: https://cbaf1057f9a3f919b6.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


