In [24]:
import torch
import requests
import json
import pandas as pd
from transformers import CamembertTokenizerFast,AutoModel,AutoModelForSequenceClassification
import torch.nn.functional as F
from mlflow.models.signature import infer_signature
from mlflow.transformers import generate_signature_output
import mlflow
from torch import nn
   

In [25]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [26]:
types = ['ถนน','ทางเท้า','แสงสว่าง','ความปลอดภัย','น้ำท่วม','ความสะอาด','กีดขวาง',
        'ท่อระบายน้ำ','สะพาน','จราจร','สายไฟ','คลอง','เสียงรบกวน','ต้นไม้','ร้องเรียน',
        'ป้าย','สัตว์จรจัด',"PM25",'สอบถาม','เสนอแนะ','คนจรจัด','การเดินทาง','ห้องน้ำ','ป้ายจราจร']

In [27]:
model = torch.load('multilabel.pth',map_location=torch.device('cpu'))


In [28]:
   
BERT_MODEL_NAME = "airesearch/wangchanberta-base-att-spm-uncased"
# model = AutoModel.from_pretrained('multilable_pretrained')
tokenizer = CamembertTokenizerFast.from_pretrained(BERT_MODEL_NAME)

In [30]:
def predict(text):
    y_pred = []
    input = tokenizer(text,padding='max_length', max_length = 256, truncation=True,return_tensors="pt")
    input = input['input_ids']
    output = model(input).logits
    output = F.sigmoid(output)
    for row in output:
        y_pred.append([1 if i>=0.5 else 0 for i in row])

    y_pred_decoded = []
    for i in y_pred:
        tmp = []
        for c in range(len(i)):
            if(i[c]==1):
                tmp.append(types[c])
        y_pred_decoded.append(tmp)

    return y_pred_decoded
    
    

In [31]:
text = 'ไม่มีที่วางขยะรอจัดเก็บ วางไว้บนทางเท้า'
text2 = 'การทิ้งขยะลงในลำคลอง'

In [32]:
predict(text)

[['ทางเท้า', 'ความสะอาด']]

In [33]:
predict(text2)

[['ความสะอาด']]

In [None]:
# predict(text)

In [None]:
# model.save_pretrained('multilabel_model')

In [None]:
# model = AutoModelForSequenceClassification.from_pretrained('multilabel_model')

In [None]:
model

In [None]:
text = 'ไม่มีที่วางขยะรอจัดเก็บ วางไว้บนทางเท้า'

In [None]:
X = tokenizer(text, padding='max_length', max_length=256, truncation=True, return_tensors='pt')['input_ids']

In [None]:
# mlflow.set_tracking_uri('http://127.0.0.1:5000')  # set up connection
# mlflow.set_experiment('multilabel')
# with mlflow.start_run() as run:
#     mlflow.pytorch.save_model(model,'multilabel')

# OverRide output

In [None]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        # Define your model architecture here
        self.model = torch.load('multilabel.pth',map_location=torch.device('cpu'))

    def forward(self, x):
        # Define the forward pass of your model
        # This function will be used during training, not for prediction
        # Typically, you would define the layers and operations here
        
        return self.model(x).logits

    def predict(self, x):
        return self.forward(x)

In [None]:
model = MyModel()

In [None]:
model(X)

In [None]:
model.predict(X)

In [None]:
# output =  generate_signature_output(model,X.numpy())
signature = infer_signature(X.numpy(), model.predict(X).detach().numpy())

In [None]:
import mlflow.pytorch
import mlflow

mlflow.set_tracking_uri('http://127.0.0.1:5000')  # set up connection
mlflow.set_experiment('multilabel')
with mlflow.start_run() as run:
    mlflow.pytorch.log_model(model, "model",signature=signature)
model_uri = "runs:/{}/model".format(run.info.run_id)
model = mlflow.pytorch.load_model(model_uri)
model

In [None]:
model(X)

In [34]:


def predict_json(server_url, input_json):
    response = requests.post(server_url, json=input_json)
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception("Request failed with status code: %s, response: %s"
                        % (response.status_code, response.text))
    
def predict(server_url, text):
    inputs = tokenizer(text, padding='max_length', max_length=256, truncation=True, return_tensors='pt')['input_ids']
    # print(inputs)
    data = {"inputs":inputs.numpy().tolist()}
    # print(data)
    
    
    
    return predict_json(server_url, data)

text = 'ไม่มีที่วางขยะรอจัดเก็บ วางไว้บนทางเท้า'

predict_result = predict("http://127.0.0.1:1244/invocations", text)

In [35]:
predict_result = predict("http://127.0.0.1:1244/invocations", text)

In [36]:
predict_result

{'predictions': [[-2.8791043758392334,
   0.3272898197174072,
   -7.104610919952393,
   -6.118514537811279,
   -6.331795692443848,
   3.317025661468506,
   -5.537487983703613,
   -7.752892971038818,
   -8.088247299194336,
   -7.010658264160156,
   -8.05642318725586,
   -8.945622444152832,
   -7.5058794021606445,
   -8.085744857788086,
   -6.152515411376953,
   -7.692965507507324,
   -7.599690914154053,
   -8.028019905090332,
   -8.24844741821289,
   -8.690380096435547,
   -7.523009777069092,
   -7.622708320617676,
   -8.261131286621094,
   -9.733368873596191]]}

In [37]:
import numpy as np
import torch.nn.functional as F

In [38]:
def decode_prediction(predict_result):
    y_pred = []
    res = np.array(predict_result['predictions'])
    res = torch.tensor(res)
    res = F.sigmoid(res)
    for row in res:
        y_pred.append([1 if i>=0.5 else 0 for i in row])

    y_pred_decoded = []
    for i in y_pred:
        tmp = []
        for c in range(len(i)):
            if(i[c]==1):
                tmp.append(types[c])
        y_pred_decoded.append(tmp)

    return y_pred_decoded

In [39]:
decode_prediction(predict_result)

[['ทางเท้า', 'ความสะอาด']]

In [None]:
# while True:
#     pass

# Textclassification

In [None]:
model = AutoModelForSequenceClassification.from_pretrained('multilabel_pretrained')

In [None]:
from transformers import Pipeline


class MyPipeline(Pipeline):
    def _sanitize_parameters(self, **kwargs):
        preprocess_kwargs = {}
        if "maybe_arg" in kwargs:
            preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
        return preprocess_kwargs, {}, {}
    def preprocess(self, inputs):
        return self.tokenizer(inputs, padding='max_length', max_length=256, truncation=True, return_tensors='pt')['input_ids']
        

    def _forward(self, model_inputs):
        return self.model(model_inputs)

    def postprocess(self, model_outputs):
        return model_outputs

In [None]:
pipeline = MyPipeline(model = model,tokenizer = tokenizer,task = "text-classification")

In [None]:
from mlflow.models.signature import infer_signature
from mlflow.transformers import generate_signature_output

In [None]:
text = 'ไม่มีที่วางขยะรอจัดเก็บ วางไว้บนทางเท้า'


In [None]:
input = tokenizer(text, padding='max_length', max_length=256, truncation=True, return_tensors='pt')['input_ids']

In [None]:
input.dtype

In [None]:
input.shape

In [None]:
res = model(tokenizer(text, padding='max_length', max_length=256, truncation=True, return_tensors='pt')['input_ids']).logits


In [None]:
res.dtype

In [None]:
res.shape

In [None]:
# import numpy as np
# from mlflow.models.signature import ModelSignature
# from mlflow.types.schema import Schema, TensorSpec

# input_schema = Schema(
#     [
#         TensorSpec(np.dtype(np.int64), (1,256)),
#     ]
# )
# output_schema = Schema([TensorSpec(np.dtype(np.float32), (1, 24))])
# signature = ModelSignature(inputs=input_schema, outputs=output_schema)


In [None]:
pipeline.predict('ไม่มีที่วางขยะรอจัดเก็บ วางไว้บนทางเท้า')

In [None]:
# tokenizer = CamembertTokenizerFast.from_pretrained(BERT_MODEL_NAME, padding='max_length', max_length=256, truncation=True, return_tensors='pt')
# tokenizer.padding = 'max_length'  # Pad sequences to the maximum length
# tokenizer.max_length = 256  # Set the maximum sequence length
# tokenizer.truncation = True 
# tokenizer('ไม่มีที่วางขยะรอจัดเก็บ วางไว้บนทางเท้า')

In [None]:
# tokenizer('ไม่มีที่วางขยะรอจัดเก็บ วางไว้บนทางเท้า', padding='max_length', max_length=256, truncation=True, return_tensors='pt')

In [None]:
# model(tokenizer('ไม่มีที่วางขยะรอจัดเก็บ วางไว้บนทางเท้า', padding='max_length', max_length=256, truncation=True, return_tensors='pt')['input_ids'])

In [None]:
text = 'ไม่มีที่วางขยะรอจัดเก็บ วางไว้บนทางเท้า'

In [None]:
import mlflow.pytorch
import mlflow
mlflow.set_tracking_uri('http://127.0.0.1:5000')  # set up connection
mlflow.set_experiment('multilabel_model')
# mlflow.transformers.autolog()
with mlflow.start_run() as run:
    # mlflow.transformers.save_model(
    #     transformers_model=pipeline,
    #     path="transformer_pipeline",
    # )
    mlflow.transformers.log_model(
        transformers_model=pipeline,
        artifact_path="multilabel_pipeline",
        
    )
model_uri = "runs:/{}/multilabel_pipeline".format(run.info.run_id)

loaded = mlflow.transformers.load_model(model_uri)
loaded


In [None]:
loaded.predict(text)

In [None]:
from mlflow.models.signature import infer_signature
from mlflow.transformers import generate_signature_output
text = 'ไม่มีที่วางขยะรอจัดเก็บ วางไว้บนทางเท้า'
df = pd.DataFrame([text],columns=['text'])
output =  generate_signature_output(loaded,df)
signature = infer_signature(df, output)



In [None]:
mlflow.transformers.save_model(
    transformers_model=loaded ,
    path="text-class",
    signature=signature,
    input_example=df,
)

loaded = mlflow.transformers.load_model("text-class")

In [None]:
mlflow.transformers.log_model(
        transformers_model=loaded,
        artifact_path="multilabel_pipeline",
        
    )

In [None]:
# import mlflow.pytorch
# import mlflow
# mlflow.set_tracking_uri('http://127.0.0.1:5000')  # set up connection
# mlflow.set_experiment('multilabel-experiment')
# with mlflow.start_run() as run:
#     mlflow.pytorch.log_model(model, "model")
# model_uri = "runs:/{}/model".format(run.info.run_id)
# model = mlflow.pytorch.load_model(model_uri)
# model








In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
   
BERT_MODEL_NAME = "airesearch/wangchanberta-base-att-spm-uncased"
tokenizer = CamembertTokenizerFast.from_pretrained(BERT_MODEL_NAME)

In [None]:
text = 'ไม่มีที่วางขยะรอจัดเก็บ วางไว้บนทางเท้า'

In [None]:
encoded_input  = tokenizer.encode_plus(text, padding='max_length', max_length=256, truncation=True, return_tensors='pt')

In [None]:
encoded_input.keys()

In [None]:

model(encoded_input['input_ids'])

In [None]:




def predict_json(server_url, input_json):
    response = requests.post(server_url, json=input_json)
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception("Request failed with status code: %s, response: %s"
                        % (response.status_code, response.text))
    
def predict(server_url, df):
    data = {"dataframe_split": df.to_dict(orient='split')}
    print(data)
    
    
    
    return predict_json(server_url, data)

text = 'ไม่มีที่วางขยะรอจัดเก็บ วางไว้บนทางเท้า'

# def predict(text):
#     input = tokenizer(text,padding='max_length', max_length = 256, truncation=True,return_tensors="pt")
#     input = input['input_ids'].squeeze(1).to(device)
#     output = model(input).logits
#     output = F.sigmoid(output)
#     res = output.detach().cpu().numpy()
    
#     return output

predict("http://127.0.0.1:1245/invocations", df)

In [None]:
predict("http://127.0.0.1:1245/invocations", df)