# Import Libraries

In [1]:
import pandas as pd
import numpy as np
import spacy
import matplotlib.pyplot as plt
import os
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM
import torch
import re
import tensorflow as tf

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_colwidth', None)  # Show full content in each cell
pd.set_option('display.width', 1000)  # Set max width

# Load spaCy's English model
nlp = spacy.load('en_core_web_sm')

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import appdirs 
print(appdirs.user_data_dir("Open Interpreter"))


/Users/pranitdas/Library/Application Support/Open Interpreter


# Question Classifier

## Text-Classification

In [None]:
tokenizer = AutoTokenizer.from_pretrained("uw-vta/bloominzer-0.1")
model = AutoModelForSequenceClassification.from_pretrained("uw-vta/bloominzer-0.1")
bloominzer = pipeline("text-classification", model=model, tokenizer=tokenizer)

In [None]:
print(bloominzer("If I have 2 pair of apple, can i make apple pie with it?"))

Device set to use mps:0


[{'label': 'Synthesis', 'score': 0.9990537762641907}]


## LLM Classification

## Zero-Shot

### BART

In [None]:
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-mnli")
model = AutoModelForSequenceClassification.from_pretrained("facebook/bart-large-mnli")

bart = pipeline("zero-shot-classification",
                      model=model , tokenizer=tokenizer)

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Device set to use mps:0


In [None]:
sequence_to_classify = "If I have 2 pair of apple, can i make apple pie with it?"
candidate_labels = ['knowledge', 'comprehension', 'application', 'analysis','synthesis', 'evaluation']
label = bart(sequence_to_classify, candidate_labels)

In [15]:
label['labels'][0]

'application'

### bart-lage-mnli-yahoo-answers

In [26]:
tokenizer = AutoTokenizer.from_pretrained("joeddav/bart-large-mnli-yahoo-answers")
model = AutoModelForSequenceClassification.from_pretrained("joeddav/bart-large-mnli-yahoo-answers")

ya_classifier = pipeline("zero-shot-classification",
                      model=model , tokenizer=tokenizer)

Device set to use mps:0


In [31]:
sequence_to_classify = "If I have 2 pair of apple, can i make apple pie with it?"
candidate_labels = ['knowledge', 'comprehension', 'application', 'analysis','synthesis', 'evaluation']
hypothesis_template = "This text is about blooms taxonomy and it is classified as {}."
label = ya_classifier(sequence_to_classify, candidate_labels, hypothesis_template=hypothesis_template)

In [32]:
label['labels'][0]

'knowledge'

## Text Generation

In [None]:
model_name = "mistralai/Ministral-8B-Instruct-2410"
tokenizer = AutoTokenizer.from_pretrained(model_name)
classifier = pipeline(
    "text-generation",
    model=model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto"  # Requires GPU
)

### LLAMA

In [2]:
import os
import torch
import torch.distributed as dist

os.environ['MASTER_ADDR'] = 'localhost'
os.environ['MASTER_PORT'] = '12355'
os.environ['RANK'] = '0'
os.environ['WORLD_SIZE'] = '1'

dist.init_process_group(backend='gloo')



In [3]:
from models.llama3.generation import Llama
import fire
generator = Llama.build(
        ckpt_dir="/Users/pranitdas/.llama/checkpoints/Llama3.1-8B-Instruct",
        tokenizer_path="/Users/pranitdas/.llama/checkpoints/Llama3.1-8B-Instruct/tokenizer.model",
        max_seq_len=8192,
        max_batch_size=1, 
        device = 'mps',
    world_size=1
    )

> initializing model parallel with size 1
> initializing ddp with size 1
> initializing pipeline with size 1
Loaded in 94.73 seconds


In [4]:
dialog = [[{
        "role": "user",
        "content": "Classify this question into Bloom's Taxonomy: 'What is photosynthesis?'"
    }]]
    
results = generator.chat_completion(dialog, temperature=0.1)
print(results[0]['choices'][0]['message']['content'])

AttributeError: 'list' object has no attribute 'role'

# Query Augmentation