In [1]:
# 필요한 라이브러리 설치
!pip install transformers



In [2]:
# transformers 라이브러리에서 필요한 모듈 임포트
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline

# 모델과 토크나이저 로드
model_name = "pongjin/roberta_with_kornli"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [3]:
# 파이프라인 생성
nli_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)

In [4]:
# 파이프라인 사용 예시
text = "여기가 정말 좋아요!"
result = nli_pipeline(text)
print(result)

[{'label': 'entailment', 'score': 0.927253246307373}]


In [5]:
class ArgumentHandler():
    """
    Base interface for handling arguments for each :class:`~transformers.pipelines.Pipeline`.
    """
    def __call__(self, *args, **kwargs):
        raise NotImplementedError()


class CustomZeroShotClassificationArgumentHandler(ArgumentHandler):
    """
    Handles arguments for zero-shot for text classification by turning each possible label into an NLI
    premise/hypothesis pair.
    """

    def _parse_labels(self, labels):
        if isinstance(labels, str):
            labels = [label.strip() for label in labels.split(",")]
        return labels

    def __call__(self, sequences, labels, hypothesis_template):
        if len(labels) == 0 or len(sequences) == 0:
            raise ValueError("You must include at least one label and at least one sequence.")
        if hypothesis_template.format(labels[0]) == hypothesis_template:
            raise ValueError(
                (
                    'The provided hypothesis_template "{}" was not able to be formatted with the target labels. '
                    "Make sure the passed template includes formatting syntax such as {{}} where the label should go."
                ).format(hypothesis_template)
            )

        if isinstance(sequences, str):
            sequences = [sequences]
        labels = self._parse_labels(labels)

        sequence_pairs = []
        for label in labels:
            # 수정부: 두 문장을 페어로 입력했을 때, `token_type_ids`가 자동으로 붙는 문제를 방지하기 위해 미리 두 문장을 `sep_token` 기준으로 이어주도록 함
            sequence_pairs.append(f"{sequences} {tokenizer.sep_token} {hypothesis_template.format(label)}")

        return sequence_pairs, sequences


In [6]:
classifier = pipeline(
    "zero-shot-classification",
    args_parser=CustomZeroShotClassificationArgumentHandler(),
    model="pongjin/roberta_with_kornli"
)


In [7]:
sequence = "배당락 D-1 코스피, 2330선 상승세...외인·기관 사자"
candidate_labels =["외환",'환율', "경제", "금융", "부동산","주식"]

classifier(
    sequence,
    candidate_labels,
    hypothesis_template='이는 {}에 관한 것이다.',
)

{'sequence': '배당락 D-1 코스피, 2330선 상승세...외인·기관 사자',
 'labels': ['주식', '금융', '경제', '외환', '환율', '부동산'],
 'scores': [0.5052869319915771,
  0.17972533404827118,
  0.1385299116373062,
  0.09460824728012085,
  0.04294918477535248,
  0.03890036791563034]}

In [8]:
sequence = "바질파스타 쉬림프알리오올리오"
candidate_labels = [
    "족발/보쌈",'돈까스', "회", '백반', "죽", "국수",
    "버거", "분식", "국밥", "일식", "고기/구이","피자",
    '치킨', "찜/탕/찌개", "양식", "중식", "아시안"
    ]

text = classifier(
    sequence,
    candidate_labels,
    hypothesis_template='이는 {}에 관한 것이다.',
)

In [9]:
classifier

<transformers.pipelines.zero_shot_classification.ZeroShotClassificationPipeline at 0x146aaa94150>

In [10]:
 def getLabel(dish_name) :
    sequence = dish_name
    candidate_labels = [
        "족발/보쌈",'돈까스', "회", '백반', "죽", "국수",
        "버거", "분식", "국밥", "일식", "고기/구이","피자",
        '치킨', "찜/탕/찌개", "양식", "중식", "아시안"
        ]

    text = classifier(
        sequence,
        candidate_labels,
        hypothesis_template='이는 {}에 관한 것이다.',
    )
    
    return text['labels'][0]

In [11]:
getLabel("피시프레시")

'피자'

In [12]:
import csv, os

In [14]:
f = open('c4_general_cafeteria_chuncheon_CC_opened_SS-HJ_sampling_menu-price.csv','r', newline='', encoding='utf-8')

rdr = csv.reader(f)

rowlist = []

for line in rdr :
    try :
        line.append(getLabel(line[2]))
    except : 
        print("No menus")
    rowlist.append(line)
    
with open('result.csv', 'w',newline='') as f: 
    # using csv.writer method from CSV package 
    write = csv.writer(f)  
    write.writerows(rowlist)

No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
No menus
