#Hands-On (2/13) Fine-Tuning AI Models with LoRA and Deploying with Streamlit

Step 1: Introduction to LoRA

Installing required libraries

In [40]:
!pip install transformers accelerate peft datasets torch streamlit



In [41]:
!pip install streamlit



#### Step 2: Import Libraries

In [42]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel, TrainingArguments, Trainer
from datasets import load_dataset
from peft import get_peft_model, LoraConfig, TaskType
import streamlit as st

#### Step 3: Load Dataset

In [44]:
# For this project, let's assume we're using a dataset containing supply chain risk-related data.
dataset = load_dataset("csv", data_files={"train": "train_data_qa.csv", "test": "test_data_qa.csv"})

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token  # Set the pad token to eos_token
model = GPT2LMHeadModel.from_pretrained("gpt2")

def tokenize_function(examples):
    return tokenizer(examples["question"], padding="max_length", truncation=True)

dataset = dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/5 [00:00<?, ? examples/s]

Step 4: Apply LoRA Fine-Tuning


In [45]:
### Step 4: Apply LoRA Fine-Tuning

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    target_modules=["c_attn"],  # Changed target modules to 'c_attn'
    task_type="CAUSAL_LM"  # Add task type for causal language modeling
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 294,912 || all params: 124,734,720 || trainable%: 0.2364




Step 5: Save Fine-Tuned Model

In [46]:
model.save_pretrained("./fine_tuned_gpt2")
tokenizer.save_pretrained("./fine_tuned_gpt2")

('./fine_tuned_gpt2/tokenizer_config.json',
 './fine_tuned_gpt2/special_tokens_map.json',
 './fine_tuned_gpt2/vocab.json',
 './fine_tuned_gpt2/merges.txt',
 './fine_tuned_gpt2/added_tokens.json')

In [52]:
%%writefile app.py
import streamlit as st
import pandas as pd
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load fine-tuned model and tokenizer
model_path = "./fine_tuned_gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_path)
model = GPT2LMHeadModel.from_pretrained(model_path)

# Load the QA dataset
qa_data = pd.read_csv("train_data_qa.csv")  # Load the dataset
qa_data = pd.read_csv("test_data_qa.csv")

# Convert dataset to dictionary for quick lookup
qa_dict = dict(zip(qa_data["question"], qa_data["answer"]))

st.title("AI-Powered Supply Chain Resilience Index (SCRI)")

# User input for question
user_question = st.text_area("Ask a supply chain-related question:")

if st.button("Get Answer"):
    if user_question in qa_dict:
        st.write("### AI Answer:")
        st.write(qa_dict[user_question])
    else:
        st.write("### Match Not Found")
        st.write("The input question is not in the dataset.")


Overwriting app.py


#Testing finetuned Model

In [63]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from peft import PeftModel

# Load fine-tuned model and tokenizer
model_path = "./fine_tuned_gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_path)
model = PeftModel.from_pretrained(GPT2LMHeadModel.from_pretrained("gpt2"), model_path)

def generate_answer(question):
    input_ids = tokenizer(question, return_tensors="pt").input_ids
    output = model.generate(input_ids, max_length=100)
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Load QA dataset for validation
import pandas as pd
qa_data = pd.read_csv("train_data_qa.csv")
qa_data = pd.read_csv("test_data_qa.csv")
qa_dict = dict(zip(qa_data["question"], qa_data["answer"]))

if __name__ == "__main__":
    user_input = input("Enter a supply chain-related question: ")

    if user_input in qa_dict:
        print("\nAI Answer:")
        print(qa_dict[user_input])
    else:
        print("\nNo match found. The input question is not in the dataset.")


Enter a supply chain-related question: What technologies help reduce supply chain fraud?

AI Answer:
Technologies like blockchain, AI fraud detection models, and digital identity verification help reduce supply chain fraud.


#Deploying the app

In [58]:
!curl https://loca.lt/mytunnelpassword

34.125.23.213

In [59]:
!streamlit run app.py &>/content/logs.txt &

In [61]:
!npx localtunnel --port 8501

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0Kyour url is: https://calm-waves-beam.loca.lt
^C
