In [None]:
### Install and import libraries
!pip install pandas openpyxl
!pip install dataset
!pip install unsloth
!pip install gradio
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install sentence-transformers bert-score nltk rouge-score
import pandas as pd
import json
from google.colab import files
import numpy as np
from datasets import Dataset
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

creation of the API for Open-Ended questions (for a given model)

In [None]:
"""
Script for generating responses to open-ended questions using a fine-tuned language model.
The script includes:
1. Initialization of a fine-tuned model and tokenizer from a checkpoint directory.
2. A function to generate responses using the fine-tuned model.
3. A Gradio interface for user interaction, allowing questions to be input and responses to be displayed.

Modules:
    gradio: For creating a web-based interface.
    unsloth.FastLanguageModel: For loading and interacting with the fine-tuned language model.
    torch: For tensor operations and model inference.

Usage:
    - Place your fine-tuned model and tokenizer checkpoint in the directory `open_ended_questions_model`.
    - Run the script to launch a Gradio interface for asking questions.
"""

import gradio as gr
from unsloth import FastLanguageModel
import torch

# Initialize the fine-tuned language model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained('./open_ended_questions_model')
"""
Loads the fine-tuned language model and tokenizer from the `open_ended_questions_model` directory.
Ensure this directory contains the appropriate model and tokenizer files.
"""

def generate_response(instruction):
    """
    Generates a response to a given instruction using the fine-tuned model.

    Args:
        instruction (str): The question or instruction for the model.

    Returns:
        str: The model's generated response.
    """
    # Enable faster inference mode for the fine-tuned model
    FastLanguageModel.for_inference(model)

    # Construct a prompt for the model
    prompt = f"""### Instruction:
Answer the provided question with the knowledge provided to you
### Question:
{instruction}
### Answer:"""

    # Tokenize the prompt and move it to the GPU
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    # Generate a response using the model
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            early_stopping=False,
            min_length=50,
            length_penalty=2,
            max_length=300
        )

    # Decode the generated output and extract the answer
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = response.split("### Answer:")[-1].strip()
    return response

def greet(question):
    """
    Processes the user's input and returns a model-generated response.

    Args:
        question (str): The user's question.

    Returns:
        str: A response generated by the model or an error message for empty input.
    """
    if question == "":
        return "No question was given to answer"
    else:
        response = generate_response(question)
        return f"{response}!"

# Create a Gradio interface for user interaction
gr.Interface(
    fn=greet,
    inputs=[gr.Textbox(label='question')],  # Input: Textbox for user input
    outputs="textbox"  # Output: Textbox for displaying the response
).launch()
"""
Launches a Gradio interface with:
- A text input for the user to ask questions.
- A text output for displaying the model's response.
"""
