In [1]:
import gradio as gr
import transformers
from transformers import pipeline
from transformers import BertTokenizerFast, BertForQuestionAnswering, TrainingArguments, Trainer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainer, Seq2SeqTrainingArguments
from transformers import MT5ForConditionalGeneration, MT5Tokenizer
import pandas as pd
import numpy as np
import torch
from bs4 import BeautifulSoup
from datasets import Dataset
import re

model_path = "./mt5-base-9038-8624"

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
qa_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)

def clean_html_tags(html_text):
    soup = BeautifulSoup(html_text, 'html.parser')
    for tag in soup(["script", "style"]):
        tag.decompose()
    return soup.get_text(separator="\n", strip=True)

# 构造输入输出
def text_to_input(text):
    context = clean_html_tags(text)
    prompt = (
        "Extract the salary info from the job ad below and return it as: "
        "\"MinimumSalary MaximumSalary Currency PayPeriod\"\n\n"
    )
    input_text = prompt + context
    
    return {"input": input_text}
  
def generate_prediction(text):
    input_text = text_to_input(text)["input"]
    output = qa_pipeline(input_text)
    predict = output[0]["generated_text"].strip()

    # 匹配格式：数字 数字 字母 字母
    pattern = r"^(\d+(?:\.\d+)?)\s+(\d+(?:\.\d+)?)\s+([A-Za-z]+)\s+([A-Za-z]+)$"
    match = re.match(pattern, predict)

    if match:
        min_pred, max_pred, currency_pred, period_pred = match.groups()
        # 四舍五入并转为整数
        min_pred = str(round(float(min_pred)))
        max_pred = str(round(float(max_pred)))
        return f"{min_pred}-{max_pred}-{currency_pred}-{period_pred}"
    else:
        return "0-0-None-None"


demo = gr.Interface(fn=generate_prediction, inputs="text", outputs="text")
demo.launch(share=True)
# input ='''
# TEAM SALE Description NURRIFA RICH SDN BHD merupakan sebuah syarikat komersial dalam industri produk kesihatan sejak 2018 dan telah mencetuskan evolusi dalam bidang jamu moden dengan mengkomersialkan idea makanan kesihatan berbentuk pastil gula-gula halal pertama di Malaysia. Berpengkalan di Kuala Terengganu dan memiliki pasukan Team Sales yang mampu menghasilkan ratusan ribu sebulan di bawah bimbingan syarikat. Untuk info lebih lanjut boleh rujuk kepada FB kami Nurrifa HQ @ www.nurrifa.com. . SKOP KERJA: 1) Membantu bakal pelanggan membuat pembelian produk syarikat melalui Whatsapp/FB/IG dan CALL. 2) Follow Up pelanggan. 3) Update Media Sosial syarikat. . SYARAT KELAYAKAN: • Perempuan& Lelaki • Berumur 18-28 tahun. • Mempunyai skill komunikasi yang bagus. • Penampilan yang menarik. . KELEBIHAN: (RM1500-RM5000 sebulan) • Elaun Asas • Elaun Telco • Komisen Lumayan • Reward Bulanan • BONUS jika capai KPI • Suasana kerja yang selesa • Circle yang positif • Latihan jualan disediakan - Parking Percuma Company MENJUAL PRODUK KESIHATAN
# '''

# output = generate_prediction(input)

# print(output)

Device set to use mps:0


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://d08666300f90074ea3.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Created dataset file at: .gradio/flagged/dataset2.csv
