In [None]:
from fairseq.models.roberta import CamembertModel
camembert = CamembertModel.from_pretrained('./Camembert-base/')
camembert.eval()
masked_line = 'Le camembert est <mask> :)'
camembert.fill_mask(masked_line, topk=3)

# The underlying model is available under the *models* attribute


In [None]:
import torch

from transformers.modeling_camembert import CamembertForMaskedLM
from transformers.tokenization_camembert import CamembertTokenizer


def fill_mask(masked_input, model, tokenizer, topk=5):
    # Adapted from https://github.com/pytorch/fairseq/blob/master/fairseq/models/roberta/hub_interface.py
    assert masked_input.count("<mask>") == 1
    input_ids = torch.tensor(tokenizer.encode(masked_input, add_special_tokens=True)).unsqueeze(0)  # Batch size 1
    logits = model(input_ids)[0]  # The last hidden-state is the first element of the output tuple
    masked_index = (input_ids.squeeze() == tokenizer.mask_token_id).nonzero().item()
    logits = logits[0, masked_index, :]
    prob = logits.softmax(dim=0)
    values, indices = prob.topk(k=topk, dim=0)
    topk_predicted_token_bpe = " ".join(
        [tokenizer.convert_ids_to_tokens(indices[i].item()) for i in range(len(indices))]
    )
    masked_token = tokenizer.mask_token
    topk_filled_outputs = []
    for index, predicted_token_bpe in enumerate(topk_predicted_token_bpe.split(" ")):
        predicted_token = predicted_token_bpe.replace("\u2581", " ")
        if " {0}".format(masked_token) in masked_input:
            topk_filled_outputs.append(
                (
                    masked_input.replace(" {0}".format(masked_token), predicted_token),
                    values[index].item(),
                    predicted_token,
                )
            )
        else:
            topk_filled_outputs.append(
                (masked_input.replace(masked_token, predicted_token), values[index].item(), predicted_token,)
            )
    return topk_filled_outputs


tokenizer = CamembertTokenizer.from_pretrained("camembert-base")
model = CamembertForMaskedLM.from_pretrained("camembert-base")
model.eval()

masked_input = "Le camembert est <mask> :)"
print(fill_mask(masked_input, model, tokenizer, topk=3))

In [None]:
from transformers import RobertaConfig, RobertaModel
import pandas as pd

csv = pd.read_csv('./Post_Data/Training_data.csv')

# Initializing a RoBERTa configuration
configuration = RobertaConfig()

# Initializing a model from the configuration
model = RobertaModel(configuration)

# Accessing the model configuration
configuration = model.config

from transformers import RobertaTokenizer
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
print(tokenizer("Hello world")['input_ids'])
print(tokenizer(" Hello world")['input_ids'])
print(csv)

for i in csv['text']:
    print(tokenizer(i)['input_ids'])
    

In [None]:
from transformers import CamembertTokenizer, CamembertForTokenClassification
import torch

#tokenizer = CamembertTokenizer.from_pretrained('camembert-base')
model = CamembertForTokenClassification.from_pretrained('camembert-base')



In [None]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
import pandas as pd
import tensorflow as tf

csv = pd.read_csv('./Post_Data/Training_data.csv')

tokenizer = AutoTokenizer.from_pretrained("Jean-Baptiste/camembert-ner")
model = AutoModelForTokenClassification.from_pretrained("Jean-Baptiste/camembert-ner")


##### Process text sample (from wikipedia)

from transformers import pipeline

nlp = pipeline('ner', model=model, tokenizer=tokenizer, aggregation_strategy="simple")

OPTIMIZER =  tf.keras.optimizers.Adam(learning_rate=3e-5)
LOSS = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
METRICS = ['accuracy']

model.compile(optimizer=OPTIMIZER, loss=LOSS, metrics=METRICS)
model.summary()


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import tensorflow as tf

tokenizer = AutoTokenizer.from_pretrained("cmarkea/distilcamembert-base-sentiment")

model = AutoModelForSequenceClassification.from_pretrained("cmarkea/distilcamembert-base-sentiment")

from transformers import pipeline

nlp = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)

nlp("Je deteste ce film")

OPTIMIZER =  tf.keras.optimizers.Adam(learning_rate=3e-5)
LOSS = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
METRICS = ['accuracy']

model.compile(optimizer=OPTIMIZER, loss=LOSS, metrics=METRICS)
model.summary()


