In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
from PIL import Image
import random

import os
import json
import cv2

import torch
from torch.utils.data import random_split, DataLoader, Dataset
from torchvision import datasets, transforms

from transformers import TrOCRProcessor, VisionEncoderDecoderModel, default_data_collator
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments



Завиксируем всю случайность!

In [2]:
seed = 23
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.determenistic = True

Соберем наш датасет из полученных файлов

In [3]:
basketball_df = pd.read_csv('for_train\\train_bascetball.csv')
streetball_df = pd.read_csv('for_train\\train_streetball.csv')
volleyball_df = pd.read_csv('for_train\\train_volleyball.csv')

res_df = pd.concat([basketball_df, streetball_df, volleyball_df], axis=0)
res_df.shape

(150637, 3)

Разобьем датасет на train test и eval части

In [4]:
diff_df, test_df, = train_test_split(res_df, test_size=0.2, shuffle=True)
train_df, eval_df = train_test_split(diff_df, test_size=0.2)
# we reset the indices to start from zero
train_df.reset_index(drop=True, inplace=True)
test_df.reset_index(drop=True, inplace=True)
eval_df.reset_index(drop=True, inplace=True)

In [5]:
store_pathes = []

class IAMDataset(Dataset):
    def __init__(self, root_dir, df, processor, max_target_length=3):
        self.root_dir = root_dir
        self.df = df
        self.processor = processor
        self.max_target_length = max_target_length

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
      
        file_name = self.df['file_name'][idx]
        text = str(self.df['text'][idx])
        store_pathes.append(file_name)
   
        image = Image.open(file_name).convert("RGB")
        image = image.resize((64, 64))
        pixel_values = self.processor(image, return_tensors="pt").pixel_values
        # add labels (input_ids) by encoding the text
        labels = self.processor.tokenizer(text, 
                                          padding="max_length", 
                                          max_length=self.max_target_length).input_ids
        # important: make sure that PAD tokens are ignored by the loss function
        labels = [label if label != self.processor.tokenizer.pad_token_id else -100 for label in labels]

        encoding = {"pixel_values": pixel_values.squeeze(), "labels": torch.tensor(labels)}
        return encoding

Создадим наши датасеты

In [6]:
from transformers import TrOCRProcessor

processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
train_dataset = IAMDataset(root_dir='C:\\Users\\Mytre\\OneDrive\\Документы\\Data\\Work\\',
                           df=train_df,
                           processor=processor)
test_dataset = IAMDataset(root_dir='C:\\Users\\Mytre\\OneDrive\\Документы\\Data\\Work\\',
                           df=test_df,
                           processor=processor)
eval_dataset = IAMDataset(root_dir='C:\\Users\\Mytre\\OneDrive\\Документы\\Data\\Work\\',
                           df=eval_df,
                           processor=processor)

Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


In [7]:
print("Number of training examples:", len(train_dataset))
print("Number of testing examples:", len(test_dataset))
print("Number of validation examples:", len(eval_dataset))

Number of training examples: 96407
Number of testing examples: 30128
Number of validation examples: 24102


In [8]:
processor.batch_decode(eval_dataset[0]['labels'], skip_special_tokens=True)
#print(eval_dataset[0]['labels'])

['', '17', '']

Загрузим предъобученный трансформер

In [9]:
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1")
#model = VisionEncoderDecoderModel.from_pretrained("model")

Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-base-stage1 and are newly initialized: ['encoder.pooler.dense.weight', 'encoder.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#model = VisionEncoderDecoderModel.from_pretrained('model')
model.to(device)
print(device)

cuda


Сконфигурируем нашу модель

In [11]:
# set special tokens used for creating the decoder_input_ids from the labels
model.config.decoder_start_token_id = processor.tokenizer.cls_token_id
model.config.pad_token_id = processor.tokenizer.pad_token_id
# make sure vocab size is set correctly
model.config.vocab_size = model.config.decoder.vocab_size

# set beam search parameters
model.config.eos_token_id = processor.tokenizer.sep_token_id
model.config.max_length = 4
model.config.early_stopping = True
model.config.no_repeat_ngram_size = 3
model.config.length_penalty = 2.0
model.config.num_beams = 10

Сконфигурируем цикл обучения

In [12]:
training_args = Seq2SeqTrainingArguments(
    predict_with_generate=True,
    num_train_epochs=40,
    evaluation_strategy="steps",
    per_device_train_batch_size=3,
    per_device_eval_batch_size=3,
    fp16=True, 
    output_dir="G:\\models1",
    logging_steps=1000,
    save_steps=20000,
    eval_steps=10000,
)

Оопределим метрику

In [13]:
import evaluate

cer_metric = evaluate.load("cer")
acc_metric = evaluate.load("accuracy")

In [14]:
'''
def compute_metrics(pred):
    label_ids = pred.label_ids
    pred_ids = pred.predictions
    pred_str = processor.batch_decode(pred_ids, skip_special_tokens=True)
    #label_ids[label_ids == -100] = processor.tokenizer.pad_token_id
    #label_str = processor.batch_decode(label_ids, skip_special_tokens=True)

    x = [] 
    for j in pred_str:       
        if j.isdigit():
            x.append(int(j))
        else:
            x.append(1000)
    label_ids = [int(x) for x in label_ids]       
    acc = acc_metric.compute(predictions=x, references=label_ids)

    return {"accuracy": acc}

def compute_cer(pred_ids, label_ids):
    pred_str = processor.batch_decode(pred_ids, skip_special_tokens=True)
    label_ids[label_ids == -100] = processor.tokenizer.pad_token_id
    label_str = processor.batch_decode(label_ids, skip_special_tokens=True)

    cer = cer_metric.compute(predictions=pred_str, references=label_str)

    return cer
'''
def compute_metrics(pred):
    labels_ids = pred.label_ids
    pred_ids = pred.predictions    

    pred_str = processor.batch_decode(pred_ids, skip_special_tokens=True)
    labels_ids[labels_ids == -100] = processor.tokenizer.pad_token_id
    label_str = processor.batch_decode(labels_ids, skip_special_tokens=True)    

    label_int = [int(x) for x in label_str]
    x = [] 
    for j in pred_str:       
        if j.isdigit():
            x.append(int(j))
        else:
            x.append(1000)

    cer = cer_metric.compute(predictions=pred_str, references=label_str)
    acc = acc_metric.compute(predictions=x, references=label_int)

    return {"cer": cer,
            "accuracy": acc}


Переопределим оптимизатор

In [15]:
from torch.optim import AdamW

optimizer = AdamW(model.parameters())

Запустим цикл обучения

In [16]:
trainer = Seq2SeqTrainer(
    model=model,
    tokenizer=processor.feature_extractor,
    args=training_args,
    compute_metrics=compute_metrics,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=default_data_collator,
)
trainer.train()

model.save_pretrained('model_next')



  0%|          | 0/1285440 [00:00<?, ?it/s]

{'loss': 1.0801, 'learning_rate': 4.996118060741847e-05, 'epoch': 0.03}
{'loss': 0.6178, 'learning_rate': 4.9922361214836946e-05, 'epoch': 0.06}
{'loss': 0.4559, 'learning_rate': 4.9883502925068456e-05, 'epoch': 0.09}
{'loss': 0.4198, 'learning_rate': 4.984460573811302e-05, 'epoch': 0.12}
{'loss': 0.3682, 'learning_rate': 4.980574744834454e-05, 'epoch': 0.16}
{'loss': 0.3815, 'learning_rate': 4.97668502613891e-05, 'epoch': 0.19}
{'loss': 0.3271, 'learning_rate': 4.9727991971620615e-05, 'epoch': 0.22}
{'loss': 0.3205, 'learning_rate': 4.9689094784665175e-05, 'epoch': 0.25}
{'loss': 0.3494, 'learning_rate': 4.9650197597709734e-05, 'epoch': 0.28}
{'loss': 0.3146, 'learning_rate': 4.961133930794125e-05, 'epoch': 0.31}




  0%|          | 0/8034 [00:00<?, ?it/s]

{'eval_loss': 2.0118749141693115, 'eval_cer': 0.12200339558573854, 'eval_accuracy': {'accuracy': 0.8630404115841009}, 'eval_runtime': 4396.3491, 'eval_samples_per_second': 5.482, 'eval_steps_per_second': 1.827, 'epoch': 0.31}
{'loss': 0.3125, 'learning_rate': 4.957244212098581e-05, 'epoch': 0.34}
{'loss': 0.327, 'learning_rate': 4.953358383121733e-05, 'epoch': 0.37}
{'loss': 0.2891, 'learning_rate': 4.9494686644261887e-05, 'epoch': 0.4}
{'loss': 0.292, 'learning_rate': 4.9455828354493404e-05, 'epoch': 0.44}
{'loss': 0.2584, 'learning_rate': 4.941693116753797e-05, 'epoch': 0.47}
{'loss': 0.2721, 'learning_rate': 4.937803398058253e-05, 'epoch': 0.5}
{'loss': 0.2675, 'learning_rate': 4.9339175690814046e-05, 'epoch': 0.53}
{'loss': 0.2681, 'learning_rate': 4.9300317401045556e-05, 'epoch': 0.56}
{'loss': 0.2192, 'learning_rate': 4.926142021409012e-05, 'epoch': 0.59}
{'loss': 0.2566, 'learning_rate': 4.922256192432164e-05, 'epoch': 0.62}


  0%|          | 0/8034 [00:00<?, ?it/s]

{'eval_loss': 2.727569580078125, 'eval_cer': 0.13534804753820034, 'eval_accuracy': {'accuracy': 0.8489751887810141}, 'eval_runtime': 4382.5075, 'eval_samples_per_second': 5.5, 'eval_steps_per_second': 1.833, 'epoch': 0.62}
{'loss': 0.2282, 'learning_rate': 4.91836647373662e-05, 'epoch': 0.65}
{'loss': 0.2116, 'learning_rate': 4.914476755041076e-05, 'epoch': 0.68}
{'loss': 0.1978, 'learning_rate': 4.910587036345532e-05, 'epoch': 0.72}
{'loss': 0.1799, 'learning_rate': 4.9067012073686834e-05, 'epoch': 0.75}
{'loss': 0.1972, 'learning_rate': 4.902811488673139e-05, 'epoch': 0.78}
{'loss': 0.1859, 'learning_rate': 4.898925659696291e-05, 'epoch': 0.81}
{'loss': 0.1721, 'learning_rate': 4.895039830719443e-05, 'epoch': 0.84}
{'loss': 0.1587, 'learning_rate': 4.891150112023899e-05, 'epoch': 0.87}
{'loss': 0.1874, 'learning_rate': 4.8872642830470504e-05, 'epoch': 0.9}
{'loss': 0.1704, 'learning_rate': 4.883374564351506e-05, 'epoch': 0.93}


  0%|          | 0/8034 [00:00<?, ?it/s]

{'eval_loss': 3.5530920028686523, 'eval_cer': 0.13083191850594228, 'eval_accuracy': {'accuracy': 0.8560700356816862}, 'eval_runtime': 4398.885, 'eval_samples_per_second': 5.479, 'eval_steps_per_second': 1.826, 'epoch': 0.93}
{'loss': 0.1683, 'learning_rate': 4.879484845655962e-05, 'epoch': 0.96}
{'loss': 0.142, 'learning_rate': 4.875599016679114e-05, 'epoch': 1.0}
{'loss': 0.1402, 'learning_rate': 4.87170929798357e-05, 'epoch': 1.03}
{'loss': 0.1501, 'learning_rate': 4.867819579288026e-05, 'epoch': 1.06}
{'loss': 0.1433, 'learning_rate': 4.863937640029873e-05, 'epoch': 1.09}
{'loss': 0.148, 'learning_rate': 4.860047921334329e-05, 'epoch': 1.12}
{'loss': 0.1426, 'learning_rate': 4.856158202638785e-05, 'epoch': 1.15}
{'loss': 0.1603, 'learning_rate': 4.852268483943242e-05, 'epoch': 1.18}
{'loss': 0.1215, 'learning_rate': 4.8483787652476977e-05, 'epoch': 1.21}
{'loss': 0.1449, 'learning_rate': 4.8444929362708494e-05, 'epoch': 1.24}


  0%|          | 0/8034 [00:00<?, ?it/s]

{'eval_loss': 3.5393173694610596, 'eval_cer': 0.11089983022071308, 'eval_accuracy': {'accuracy': 0.8811301966641772}, 'eval_runtime': 4383.1655, 'eval_samples_per_second': 5.499, 'eval_steps_per_second': 1.833, 'epoch': 1.24}
{'loss': 0.1444, 'learning_rate': 4.840603217575305e-05, 'epoch': 1.28}
{'loss': 0.1492, 'learning_rate': 4.836713498879761e-05, 'epoch': 1.31}
{'loss': 0.1329, 'learning_rate': 4.832823780184217e-05, 'epoch': 1.34}
{'loss': 0.1365, 'learning_rate': 4.8289418409260646e-05, 'epoch': 1.37}
{'loss': 0.1268, 'learning_rate': 4.8250521222305205e-05, 'epoch': 1.4}
{'loss': 0.1127, 'learning_rate': 4.8211624035349765e-05, 'epoch': 1.43}
{'loss': 0.148, 'learning_rate': 4.817276574558128e-05, 'epoch': 1.46}
{'loss': 0.128, 'learning_rate': 4.813386855862584e-05, 'epoch': 1.49}
{'loss': 0.146, 'learning_rate': 4.809497137167041e-05, 'epoch': 1.52}
{'loss': 0.13, 'learning_rate': 4.8056113081901924e-05, 'epoch': 1.56}


  0%|          | 0/8034 [00:00<?, ?it/s]

{'eval_loss': 3.0631589889526367, 'eval_cer': 0.14210526315789473, 'eval_accuracy': {'accuracy': 0.8501784084308356}, 'eval_runtime': 4383.9752, 'eval_samples_per_second': 5.498, 'eval_steps_per_second': 1.833, 'epoch': 1.56}
{'loss': 0.1143, 'learning_rate': 4.801721589494648e-05, 'epoch': 1.59}
{'loss': 0.1443, 'learning_rate': 4.7978318707991036e-05, 'epoch': 1.62}
{'loss': 0.1374, 'learning_rate': 4.79394215210356e-05, 'epoch': 1.65}
{'loss': 0.125, 'learning_rate': 4.790056323126711e-05, 'epoch': 1.68}
{'loss': 0.1211, 'learning_rate': 4.786166604431168e-05, 'epoch': 1.71}
{'loss': 0.1161, 'learning_rate': 4.7822807754543195e-05, 'epoch': 1.74}
{'loss': 0.1038, 'learning_rate': 4.7783910567587754e-05, 'epoch': 1.77}
{'loss': 0.1328, 'learning_rate': 4.7745013380632314e-05, 'epoch': 1.8}
{'loss': 0.1366, 'learning_rate': 4.770611619367687e-05, 'epoch': 1.84}
{'loss': 0.1232, 'learning_rate': 4.766725790390839e-05, 'epoch': 1.87}


  0%|          | 0/8034 [00:00<?, ?it/s]

{'eval_loss': 3.8205153942108154, 'eval_cer': 0.30512733446519524, 'eval_accuracy': {'accuracy': 0.6618537880673803}, 'eval_runtime': 4379.3703, 'eval_samples_per_second': 5.504, 'eval_steps_per_second': 1.835, 'epoch': 1.87}
{'loss': 0.1279, 'learning_rate': 4.762836071695295e-05, 'epoch': 1.9}
{'loss': 0.1231, 'learning_rate': 4.7589463529997515e-05, 'epoch': 1.93}
{'loss': 0.1196, 'learning_rate': 4.755060524022903e-05, 'epoch': 1.96}
{'loss': 0.1281, 'learning_rate': 4.751170805327359e-05, 'epoch': 1.99}
{'loss': 0.1025, 'learning_rate': 4.747281086631815e-05, 'epoch': 2.02}
{'loss': 0.0949, 'learning_rate': 4.743395257654967e-05, 'epoch': 2.05}
{'loss': 0.1135, 'learning_rate': 4.7395094286781185e-05, 'epoch': 2.08}
{'loss': 0.1115, 'learning_rate': 4.7356197099825744e-05, 'epoch': 2.12}
{'loss': 0.099, 'learning_rate': 4.73172999128703e-05, 'epoch': 2.15}
{'loss': 0.1049, 'learning_rate': 4.727844162310182e-05, 'epoch': 2.18}


  0%|          | 0/8034 [00:00<?, ?it/s]

{'eval_loss': 4.5669026374816895, 'eval_cer': 0.36169779286926995, 'eval_accuracy': {'accuracy': 0.6326445938096423}, 'eval_runtime': 4387.1009, 'eval_samples_per_second': 5.494, 'eval_steps_per_second': 1.831, 'epoch': 2.18}
{'loss': 0.1085, 'learning_rate': 4.723954443614638e-05, 'epoch': 2.21}
{'loss': 0.1128, 'learning_rate': 4.72006861463779e-05, 'epoch': 2.24}
{'loss': 0.1071, 'learning_rate': 4.716178895942246e-05, 'epoch': 2.27}
{'loss': 0.1096, 'learning_rate': 4.7122891772467015e-05, 'epoch': 2.3}
{'loss': 0.1069, 'learning_rate': 4.708403348269853e-05, 'epoch': 2.33}
{'loss': 0.1031, 'learning_rate': 4.704513629574309e-05, 'epoch': 2.36}
{'loss': 0.0868, 'learning_rate': 4.700623910878765e-05, 'epoch': 2.4}
{'loss': 0.1076, 'learning_rate': 4.696738081901917e-05, 'epoch': 2.43}
{'loss': 0.099, 'learning_rate': 4.692848363206373e-05, 'epoch': 2.46}
{'loss': 0.0998, 'learning_rate': 4.688958644510829e-05, 'epoch': 2.49}


  0%|          | 0/8034 [00:00<?, ?it/s]

{'eval_loss': 3.769071578979492, 'eval_cer': 0.4029881154499151, 'eval_accuracy': {'accuracy': 0.5733963986391171}, 'eval_runtime': 4464.3624, 'eval_samples_per_second': 5.399, 'eval_steps_per_second': 1.8, 'epoch': 2.49}
{'loss': 0.0991, 'learning_rate': 4.685072815533981e-05, 'epoch': 2.52}
{'loss': 0.1053, 'learning_rate': 4.681183096838437e-05, 'epoch': 2.55}
{'loss': 0.1014, 'learning_rate': 4.6772972678615886e-05, 'epoch': 2.58}
{'loss': 0.1067, 'learning_rate': 4.6734075491660446e-05, 'epoch': 2.61}
{'loss': 0.1074, 'learning_rate': 4.669521720189196e-05, 'epoch': 2.65}
{'loss': 0.0905, 'learning_rate': 4.665632001493652e-05, 'epoch': 2.68}
{'loss': 0.1119, 'learning_rate': 4.661746172516804e-05, 'epoch': 2.71}
{'loss': 0.0971, 'learning_rate': 4.65785645382126e-05, 'epoch': 2.74}
{'loss': 0.119, 'learning_rate': 4.653966735125716e-05, 'epoch': 2.77}
{'loss': 0.1019, 'learning_rate': 4.6500770164301723e-05, 'epoch': 2.8}


  0%|          | 0/8034 [00:00<?, ?it/s]

{'eval_loss': 4.572350978851318, 'eval_cer': 0.3537181663837012, 'eval_accuracy': {'accuracy': 0.6240560949298813}, 'eval_runtime': 4531.9594, 'eval_samples_per_second': 5.318, 'eval_steps_per_second': 1.773, 'epoch': 2.8}
{'loss': 0.1087, 'learning_rate': 4.6461911874533234e-05, 'epoch': 2.83}
{'loss': 0.0965, 'learning_rate': 4.64230146875778e-05, 'epoch': 2.86}


KeyboardInterrupt: 

In [None]:
url = "crops\\8\\ballerTV_137example.jpg"
image = Image.open(url).convert("RGB")

pixel_values = processor(image, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values.cuda())

generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(f'Номер на футболке: {generated_text}')

Определим eval_dataloader и Загрузим обученную модель

In [None]:
test_dataloader = DataLoader(test_dataset, batch_size=32)
model = VisionEncoderDecoderModel.from_pretrained('model')

# set special tokens used for creating the decoder_input_ids from the labels
model.config.decoder_start_token_id = processor.tokenizer.cls_token_id
model.config.pad_token_id = processor.tokenizer.pad_token_id
# make sure vocab size is set correctly
model.config.vocab_size = model.config.decoder.vocab_size

# set beam search parameters
model.config.eos_token_id = processor.tokenizer.sep_token_id
model.config.max_new_tokens = 4
model.config.early_stopping = True
model.config.no_repeat_ngram_size = 3
model.config.length_penalty = 2.0
model.config.num_beams = 10

Посчитаем точность обученной модели на eval датасете

In [None]:
from sklearn.metrics import accuracy_score

torch.cuda.empty_cache()
# Define the device to run the evaluation on
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set the model to evaluation mode
model.eval()
model.to(device)
# Evaluate the model on the eval dataset
diff_acc = []
acc = []
i = 0

with torch.no_grad():
    for batch in tqdm(test_dataloader):
    
        target_text = processor.batch_decode(batch['labels'], skip_special_tokens=True)
        target_text = [int(x) for x in target_text]
           
        pixel_values = batch['pixel_values']
        generated_ids = model.generate(pixel_values.cuda(), max_new_tokens=4)       
       
        # Make a prediction
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)

        x = [] 
        for j in generated_text:       
            if j.isdigit():
                x.append(int(j))
            else:
                x.append(1000)        
      
        bach_acc = accuracy_score(target_text, x)
        # Save the true and predicted labels
        diff_acc.append(bach_acc)  
        acc.append(bach_acc)

        if (i % 50  == 0) & (i > 2):
            accuracy = np.mean(diff_acc)
            print("Accuracy:", accuracy) 
        i += 1

    print(f"Total accuracy: {np.mean(acc)}")    

In [None]:
df10 = pd.read_csv('anno_00new.csv')

eval_dataset = IAMDataset(root_dir='C:\\Users\\Mytre\\OneDrive\\Документы\\Data\\Work\\',
                           df=df10,
                           processor=processor)

print("Number of validation examples:", len(eval_dataset))

Посчитаем точность обученной модели на eval датасете

In [None]:
from sklearn.metrics import accuracy_score

# Define the device to run the evaluation on
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set the model to evaluation mode
model.eval()
model.to(device)
# Evaluate the model on the eval dataset
y_true = []
y_pred = []
xx = []
with torch.no_grad():
    for i in tqdm(range(len(test_dataset)), nrows=2):
        url = test_df['file_name'][i]
        xx.append(url)
        target_text = int(test_df['text'][i])
        image = Image.open(url).convert("RGB")   

        pixel_values = processor(image, return_tensors="pt").pixel_values
        generated_ids = model.generate(pixel_values.cuda(), max_new_tokens=3)

        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

        #pixel_values = batch['pixel_values']
        #generated_ids = model.generate(pixel_values.cuda())       

        # Make a prediction
        #generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
        x = []        
        if generated_text.isdigit():
            
            x = int(generated_text)
        else:
            x = 1000    
        
        # Save the true and predicted labels
        y_true.append(target_text)
        y_pred.append(x)

        if (i % 400  == 0) & (i > 2):
            accuracy = accuracy_score(y_true, y_pred)
            print("Accuracy:", accuracy) 

print("Accuracy:", accuracy)                   