<a href="https://colab.research.google.com/github/22yjeong/Python-GPT/blob/main/Python_GPT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install datasets torch flask flask-ngrok pyngrok huggingface_hub

In [None]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

In [None]:
from transformers import Trainer, TrainingArguments, pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from datasets import load_dataset
from trl import SFTTrainer
from peft import LoraConfig, PeftModel
from flask import Flask, request, render_template_string
from pyngrok import ngrok, conf
import torch

In [None]:
# Set up Quantization arguements
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Use 4-bit quantization for maximum model size reduction
    bnb_4bit_compute_dtype=getattr(torch, "float16"),  # Use FP16 for computation to balance speed and memory usage
    bnb_4bit_quant_type="nf4",  # Use the nf4 quantization type, which is more space-efficient
)

Auth_token = "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"
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('aboonaji/llama2finetune-v2', trust_remote_code = True)
model = AutoModelForCausalLM.from_pretrained('aboonaji/llama2finetune-v2', quantization_config = quantization_config, use_auth_token = Auth_token)

# Set pad token id and padding side
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = 'right'

# speed up training
model.config.use_cache = False
model.config.pretraining_tp = 1


In [None]:
def update_prompt(dataset_name, new_prompt_template):
    # Load the dataset
    dataset = load_dataset(dataset_name)

    def modify_prompt(example):
        # Replace the prompt in each example with the new prompt template
        example['text'] = new_prompt_template.replace('{{ user_query }}', example['text'].split('[/INST]', 1)[-1])
        return example

    # Apply the modification to the entire dataset
    updated_dataset = dataset.map(modify_prompt)

    return updated_dataset

# New prompt template for the AI assistant
new_prompt = (
    "<s>[INST] <<SYS>> You are a helpful AI assistant specialized in the Python coding language."
    "Always provide clear, accurate, and concise answers in English."
    "If you are unsure about something, it is better to acknowledge your uncertainty than to provide false information."
    "Ensure that you understand and can clearly explain how any provided code works.<</SYS>> {{ user_query }} [/INST]"
)
#luisroque/instruct-python-llama2-20k luisroque/instruct-python-llama2-500k Megnis/python_code_instructions_18k_LlaMa2
# Apply the new prompt engineering to the dataset
updated_dataset = update_prompt("luisroque/instruct-python-llama2-500k", new_prompt)

In [None]:
# Load the dataset
dataset = load_dataset("jtatman/python-code-dataset-500k")

# Define the formatting function
def format_for_llama(row):
    combined_text = (
        f"<s>[INST] <<SYS>> {row['system']} <</SYS>> {row['instruction']} [/INST] {row['output']} </s>"
    )
    return combined_text

# Apply the formatting function to each row
dataset = dataset.map(lambda x: {"text": format_for_llama(x)})

# Keep only the new 'text' column
dataset = dataset.remove_columns(['system', 'instruction', 'output'])

In [None]:
# Type of optimizer and learning rate scheduler to use during training.
optimizer_type = "paged_adamw_32bit"

# "cosine" refers to a cosine annealing schedule, which gradually decreases the learning rate following a cosine curve.
scheduler_type = "cosine"

# Initialize TrainingArguments
training_args = TrainingArguments(
    output_dir='./results',  # Directory where the model checkpoints and logs will be saved.
    per_device_train_batch_size=4,  # Batch size to use per GPU/TPU core/CPU during training.
    max_steps=200,  # Maximum number of training steps (batches) to perform. Training will stop once this number is reached.
    fp16=True,  # Enables mixed precision training using 16-bit floating-point (half-precision) to reduce memory usage and speed up training.
    optim=optimizer_type,  # Sets the optimizer to use during training. Here, "paged_adamw_32bit" is a memory-efficient variant of AdamW.
    lr_scheduler_type=scheduler_type,  # Specifies the learning rate scheduler. "cosine" means the learning rate will follow a cosine schedule.
    logging_steps=25,  # Log training metrics every 10 steps.
)


In [None]:
# Initialize the Supervised Fine-Tuning Trainer.
trainer = SFTTrainer(
    model=model, # Machine model that will be trained
    args=training_args, # the training arguments
    train_dataset=dataset['train'], # dataset that the Model will be trained on
    tokenizer=tokenizer, # tokenizer for the dataset
    dataset_text_field = "text", #column from th dataset
    peft_config = LoraConfig(task_type = "CAUSAL_LM", r = 256, lora_alpha = 16, lora_dropout = 0.1) # Parameter Efficient Fine-tuning
)

In [None]:
# Resume training
trainer.train()

In [None]:
#NGROK_AUTHTOKEN initialzed
NGROK_AUTHTOKEN = '2iqKM5af0TdM3NbjKByb11C3OcB_7VYFTqsQhnAsHTPcbmp6r'
ngrok.set_auth_token(NGROK_AUTHTOKEN)

# Initialize the Flask application
app = Flask(__name__)

# HTML template for the web page
HTML_TEMPLATE = """
<!doctype html>
<html>
    <head>
        <title>Python GPT</title>
        <style>
            body {
                background-color: #2c2c2c;
                color: #ffffff;
                font-family: Arial, sans-serif;
                margin: 0;
                padding: 20px;
                text-align: center;
            }
            h1 {
                color: #ffffff;
            }
            label {
                color: #ffffff;
            }
            input[type="text"] {
                width: 60%;
                padding: 10px;
                margin: 10px 0;
                box-sizing: border-box;
                border: 2px solid #ffffff;
                border-radius: 4px;
                background-color: #3c3c3c;
                color: #ffffff;
                font-size: 16px;  /* Restored original size */
            }
            input[type="submit"] {
                padding: 10px 20px;
                border: none;
                border-radius: 4px;
                background-color: #4caf50;
                color: white;
                cursor: pointer;
            }
            input[type="submit"]:hover {
                background-color: #45a049;
            }
            .response-container {
                max-height: 400px;
                overflow-y: auto;
                margin-top: 20px;
                border: 1px solid #ffffff;
                border-radius: 4px;
                background-color: #3c3c3c;
                padding: 10px;
                text-align: left;
            }
            .response-container p {
                color: #d3d3d3;
                background-color: #444444;
                padding: 10px;
                border-radius: 4px;
                display: inline-block;
                width: 100%;
                box-sizing: border-box;
                word-wrap: break-word;
                white-space: pre-wrap;
            }
            .response-container pre {
                background-color: #2c2c2c;
                color: #ffffff;
                padding: 10px;
                border-radius: 4px;
                overflow-x: auto;
            }
            .input-group {
                margin-bottom: 20px;
                display: flex;
                justify-content: center;
                align-items: center;
            }
        </style>
    </head>
    <body>
        <h1>Python Assistant</h1>
        <form action="/" method="post">
            <div class="input-group">
                <label for="query">Enter your Questions about Python code here</label>
            </div>
            <div class="input-group">
                <input type="text" id="query" name="query" value="{{ query }}" required>
            </div>
            <input type="submit" value="Submit">
        </form>
        {% if response %}
            <div class="response-container">
                <h2>Response:</h2>
                <p>{{response|safe}}</p>
            </div>
        {% endif %}
    </body>
</html>
"""

generator = pipeline(task="text-generation", model= trainer.model, tokenizer=tokenizer)

def format_response(text):
    # Replace special tokens and add line breaks for readability
    text = text.replace('<s>', '').replace('</s>', '')
    text = text.replace('[INST]', '').replace('[/INST]', '')

    # Trim leading/trailing whitespace to avoid extra spaces
    text = text.strip()

    # Add HTML line breaks
    text = text.replace('\n', '<br>')
    return text

@app.route("/", methods=["GET", "POST"])
def home():
    response = None
    if request.method == "POST":
        user_query = request.form["query"]
        max_length = 1024 # Set a fixed max_length value
        result = generator(f"<s>[INST] {user_query} [/INST]", max_length=max_length)
        response = format_response(result[0]['generated_text'])

    return render_template_string(HTML_TEMPLATE, response=response)


# Open a ngrok tunnel to the HTTP server
if __name__ == "__main__":
    public_url = ngrok.connect(5000)
    print(" * ngrok tunnel \"{}\" -> \"http://127.0.0.1:5000\"".format(public_url))

    # Run the Flask application
    app.run()

