In [22]:
from gliner import GLiNER
import os 
# Load GLiNER model from local directory
gliner_path = "/Users/firaterman/Documents/data/gliner_multi-v2.1"
print("file in", os.listdir(gliner_path))

model = GLiNER.from_pretrained(gliner_path)

# Load tokenizer from local directory
tokenizer_path = "/Users/firaterman/Documents/data/mdeberta-v3-base"
print("file in", os.listdir(tokenizer_path))
model.tokenizer = model.tokenizer_class.from_pretrained(tokenizer_path)

# Sample text for entity prediction
text = """
Cristiano Ronaldo dos Santos Aveiro (Portuguese pronunciation: [kɾiʃˈtjɐnu ʁɔˈnaldu]; born 5 February 1985) is a Portuguese professional footballer who plays as a forward for and captains both Saudi Pro League club Al Nassr and the Portugal national team...
"""

# Labels for entity prediction
labels = ["Person", "Award", "Date", "Competitions", "Teams"]

# Perform entity prediction
entities = model.predict_entities(text, labels, threshold=0.5)

# Display predicted entities and their labels
for entity in entities:
    print(entity["text"], "=>", entity["label"])


In [26]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig
import os
import json

class OfflineGliner:
    def __init__(self, gliner_path, mdeberta_path):
        """
        Initialize Gliner with local model files
        
        Args:
            gliner_path: Path to the downloaded gliner_multi-v2.1 folder
            mdeberta_path: Path to the downloaded mdeberta-v3-base folder
        """
        # First, load and modify the config if necessary
        config_path = os.path.join(gliner_path, "config.json")
        if os.path.exists(config_path):
            with open(config_path, 'r') as f:
                config_dict = json.load(f)
                if 'model_type' not in config_dict:
                    config_dict['model_type'] = 'deberta-v3'  # Set the model type explicitly
            
            # Write back the modified config
            with open(config_path, 'w') as f:
                json.dump(config_dict, f)
        
        # Load config first
        config = AutoConfig.from_pretrained(
            gliner_path,
            local_files_only=True
        )
        
        # Load tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(
            mdeberta_path,
            local_files_only=True,
            use_fast=True
        )
        
        # Load model with explicit config
        self.model = AutoModelForSequenceClassification.from_pretrained(
            gliner_path,
            config=config,
            local_files_only=True
        )
        self.model.eval()  # Set to evaluation mode
        
    def predict(self, text, max_length=512):
        """
        Make predictions using the local Gliner model
        
        Args:
            text: Input text to analyze
            max_length: Maximum sequence length (default: 512)
        
        Returns:
            Predictions from the model
        """
        # Tokenize input
        inputs = self.tokenizer(
            text,
            truncation=True,
            max_length=max_length,
            return_tensors="pt"
        )
        
        # Get predictions
        with torch.no_grad():
            outputs = self.model(**inputs)
            predictions = outputs.logits
            
        return predictions

    def process_batch(self, texts, batch_size=8):
        """
        Process multiple texts in batches
        
        Args:
            texts: List of input texts
            batch_size: Number of texts to process at once (default: 8)
        
        Returns:
            List of predictions for each text
        """
        results = []
        for i in range(0, len(texts), batch_size):
            batch = texts[i:i + batch_size]
            batch_inputs = self.tokenizer(
                batch,
                truncation=True,
                padding=True,
                return_tensors="pt"
            )
            
            with torch.no_grad():
                outputs = self.model(**batch_inputs)
                batch_predictions = outputs.logits
                results.extend(batch_predictions)
                
        return results

In [27]:
gliner = OfflineGliner(
    gliner_path="/Users/firaterman/Documents/data/gliner_multi-v2.1",
    mdeberta_path="/Users/firaterman/Documents/data/mdeberta-v3-base"
)

# Test with a simple prediction
text = "Your input text here"
prediction = gliner.predict(text)

ValueError: Unrecognized model in /Users/firaterman/Documents/data/gliner_multi-v2.1. Should have a `model_type` key in its config.json, or contain one of the following strings in its name: albert, align, altclip, aria, aria_text, audio-spectrogram-transformer, autoformer, bamba, bark, bart, beit, bert, bert-generation, big_bird, bigbird_pegasus, biogpt, bit, blenderbot, blenderbot-small, blip, blip-2, bloom, bridgetower, bros, camembert, canine, chameleon, chinese_clip, chinese_clip_vision_model, clap, clip, clip_text_model, clip_vision_model, clipseg, clvp, code_llama, codegen, cohere, cohere2, colpali, conditional_detr, convbert, convnext, convnextv2, cpmant, ctrl, cvt, dac, data2vec-audio, data2vec-text, data2vec-vision, dbrx, deberta, deberta-v2, decision_transformer, deformable_detr, deit, depth_anything, deta, detr, diffllama, dinat, dinov2, dinov2_with_registers, distilbert, donut-swin, dpr, dpt, efficientformer, efficientnet, electra, emu3, encodec, encoder-decoder, ernie, ernie_m, esm, falcon, falcon_mamba, fastspeech2_conformer, flaubert, flava, fnet, focalnet, fsmt, funnel, fuyu, gemma, gemma2, git, glm, glpn, gpt-sw3, gpt2, gpt_bigcode, gpt_neo, gpt_neox, gpt_neox_japanese, gptj, gptsan-japanese, granite, granitemoe, graphormer, grounding-dino, groupvit, hiera, hubert, ibert, idefics, idefics2, idefics3, idefics3_vision, ijepa, imagegpt, informer, instructblip, instructblipvideo, jamba, jetmoe, jukebox, kosmos-2, layoutlm, layoutlmv2, layoutlmv3, led, levit, lilt, llama, llava, llava_next, llava_next_video, llava_onevision, longformer, longt5, luke, lxmert, m2m_100, mamba, mamba2, marian, markuplm, mask2former, maskformer, maskformer-swin, mbart, mctct, mega, megatron-bert, mgp-str, mimi, mistral, mixtral, mllama, mobilebert, mobilenet_v1, mobilenet_v2, mobilevit, mobilevitv2, modernbert, moonshine, moshi, mpnet, mpt, mra, mt5, musicgen, musicgen_melody, mvp, nat, nemotron, nezha, nllb-moe, nougat, nystromformer, olmo, olmo2, olmoe, omdet-turbo, oneformer, open-llama, openai-gpt, opt, owlv2, owlvit, paligemma, patchtsmixer, patchtst, pegasus, pegasus_x, perceiver, persimmon, phi, phi3, phimoe, pix2struct, pixtral, plbart, poolformer, pop2piano, prophetnet, pvt, pvt_v2, qdqbert, qwen2, qwen2_audio, qwen2_audio_encoder, qwen2_moe, qwen2_vl, rag, realm, recurrent_gemma, reformer, regnet, rembert, resnet, retribert, roberta, roberta-prelayernorm, roc_bert, roformer, rt_detr, rt_detr_resnet, rwkv, sam, seamless_m4t, seamless_m4t_v2, segformer, seggpt, sew, sew-d, siglip, siglip_vision_model, speech-encoder-decoder, speech_to_text, speech_to_text_2, speecht5, splinter, squeezebert, stablelm, starcoder2, superpoint, swiftformer, swin, swin2sr, swinv2, switch_transformers, t5, table-transformer, tapas, textnet, time_series_transformer, timesformer, timm_backbone, timm_wrapper, trajectory_transformer, transfo-xl, trocr, tvlt, tvp, udop, umt5, unispeech, unispeech-sat, univnet, upernet, van, video_llava, videomae, vilt, vipllava, vision-encoder-decoder, vision-text-dual-encoder, visual_bert, vit, vit_hybrid, vit_mae, vit_msn, vitdet, vitmatte, vitpose, vitpose_backbone, vits, vivit, wav2vec2, wav2vec2-bert, wav2vec2-conformer, wavlm, whisper, xclip, xglm, xlm, xlm-prophetnet, xlm-roberta, xlm-roberta-xl, xlnet, xmod, yolos, yoso, zamba, zoedepth