In [1]:
import os
import pandas as pd

def load_data_from_directory(directory):
    reviews = []
    labels = []
    
    for label in ['pos', 'neg']:
        folder_path = os.path.join(directory, label)
        label_value = 1 if label == 'pos' else 0
        
        for filename in os.listdir(folder_path):
            if filename.endswith('.txt'):  # Ensure we only read text files
                with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as f:
                    reviews.append(f.read())
                    labels.append(label_value)

    return pd.DataFrame({'review': reviews, 'label': labels})

# Load the training and testing data
train_df = load_data_from_directory('IMDB/train')
test_df = load_data_from_directory('IMDB/test')

# Display the first few rows of the training dataset
print(train_df.head())


                                              review  label
0  Bromwell High is a cartoon comedy. It ran at t...      1
1  Homelessness (or Houselessness as George Carli...      1
2  Brilliant over-acting by Lesley Ann Warren. Be...      1
3  This is easily the most underrated film inn th...      1
4  This is not the typical Mel Brooks film. It wa...      1


In [2]:
import os
import pandas as pd
import re
import string
import torch
from transformers import BertTokenizer


In [3]:
def clean_text(text):
    text = re.sub(r'<.*?>', '', text)  # Remove HTML tags
    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    text = re.sub(r'\d+', '', text)  # Remove numbers (optional)
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\s+', ' ', text).strip()  # Remove extra whitespace
    return text


In [4]:
def load_data_from_directory(directory):
    reviews = []
    labels = []
    
    for label in ['pos', 'neg']:
        folder_path = os.path.join(directory, label)  # Using the directory parameter
        label_value = 1 if label == 'pos' else 0
        
        for filename in os.listdir(folder_path):
            if filename.endswith('.txt'):
                with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as f:
                    reviews.append(f.read())
                    labels.append(label_value)

    return pd.DataFrame({'review': reviews, 'label': labels})


In [5]:
train_df = load_data_from_directory('IMDB/train')
test_df = load_data_from_directory('IMDB/test')


In [6]:
train_df['review'] = train_df['review'].apply(clean_text)
test_df['review'] = test_df['review'].apply(clean_text)


In [7]:
train_df.head()

Unnamed: 0,review,label
0,bromwell high is a cartoon comedy it ran at th...,1
1,homelessness or houselessness as george carlin...,1
2,brilliant overacting by lesley ann warren best...,1
3,this is easily the most underrated film inn th...,1
4,this is not the typical mel brooks film it was...,1


In [8]:
test_df.head()

Unnamed: 0,review,label
0,i went and saw this movie last night after bei...,1
1,actor turned director bill paxton follows up h...,1
2,as a recreational golfer with some knowledge o...,1
3,i saw this film in a sneak preview and it is d...,1
4,bill paxton has taken the true story of the us...,1


In [9]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')


In [10]:
tokens = tokenizer.batch_encode_plus(
    train_df['review'].tolist(),
    max_length=128,
    padding=True,
    truncation=True,
    return_tensors='pt'
)


In [11]:
train_input_ids = tokens['input_ids']
train_attention_mask = tokens['attention_mask']


In [12]:
train_labels = torch.tensor(train_df['label'].tolist())


In [13]:
class IMDBDataset(torch.utils.data.Dataset):
    def __init__(self, input_ids, attention_mask, labels):
        self.input_ids = input_ids
        self.attention_mask = attention_mask
        self.labels = labels

    def __getitem__(self, idx):
        return {
            'input_ids': self.input_ids[idx],
            'attention_mask': self.attention_mask[idx],
            'labels': self.labels[idx]
        }

    def __len__(self):
        return len(self.labels)


In [14]:
train_dataset = IMDBDataset(train_input_ids, train_attention_mask, train_labels)


In [15]:
test_tokens = tokenizer.batch_encode_plus(
    test_df['review'].tolist(),
    max_length=128,
    padding=True,
    truncation=True,
    return_tensors='pt'
)


In [16]:
test_input_ids = test_tokens['input_ids']
test_attention_mask = test_tokens['attention_mask']
test_labels = torch.tensor(test_df['label'].tolist())

# Create the testing dataset
test_dataset = IMDBDataset(test_input_ids, test_attention_mask, test_labels)


In [19]:
from torch.utils.data import DataLoader, TensorDataset
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Preparing our dataset for model training and evaluation.

In [22]:
from transformers import BertForSequenceClassification, AdamW
from tqdm import tqdm

# Initialize the model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
optimizer = AdamW(model.parameters(), lr=2e-5)

# Move the model to GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Set the model in training mode
model.train()

# Training Loop
for epoch in range(3):  # Set the number of epochs
    print(f'Epoch {epoch + 1}/{3}')
    total_loss = 0

    for batch in tqdm(train_loader):
        # Move data to GPU if available
        b_input_ids = batch['input_ids'].to(device)
        b_attention_mask = batch['attention_mask'].to(device)
        b_labels = batch['labels'].to(device)
    
        # Clear previous gradients
        optimizer.zero_grad()
    
        # Forward pass
        outputs = model(b_input_ids, attention_mask=b_attention_mask, labels=b_labels)
        loss = outputs.loss
        total_loss += loss.item()
    
        # Backward pass
        loss.backward()
        optimizer.step()

    avg_loss = total_loss / len(train_loader)
    print(f'Average Training Loss: {avg_loss}')


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3


100%|██████████████████████████████████████████████████████████████████████████████| 782/782 [5:57:11<00:00, 27.41s/it]


Average Training Loss: 0.3356241104776597
Epoch 2/3


100%|██████████████████████████████████████████████████████████████████████████████| 782/782 [5:58:30<00:00, 27.51s/it]


Average Training Loss: 0.20139865181349276
Epoch 3/3


100%|██████████████████████████████████████████████████████████████████████████████| 782/782 [5:55:19<00:00, 27.26s/it]

Average Training Loss: 0.10194295275982593





In [26]:
model_save_path = 'bert_model'
tokenizer_save_path = 'bert_tokenizer'

# Save the trained model
model.save_pretrained(model_save_path)

# Save the tokenizer (assumed it's already initialized)
tokenizer.save_pretrained(tokenizer_save_path)

print(f'Model and tokenizer saved to {model_save_path} and {tokenizer_save_path}')

Model and tokenizer saved to bert_model and bert_tokenizer


In [None]:
# IMPORT MODEL BECAUSE TRAINING IS OVER AND MODEL IS SAVED

In [32]:
import mlflow
from tqdm import tqdm
import numpy as np
from torch.utils.data import DataLoader, TensorDataset, Subset


# Step 5: Model Monitoring with Progress
class ModelMonitor:
    def __init__(self, model, test_loader, accuracy_threshold=0.85, drift_threshold=0.05, sample_fraction=0.2):
        self.model = model
        self.test_loader = test_loader
        self.last_accuracy = 0
        self.accuracy_threshold = accuracy_threshold
        self.drift_threshold = drift_threshold
        self.sample_fraction = sample_fraction

    def evaluate_model(self):
        # Sample a subset of test_loader for evaluation
        total_samples = len(self.test_loader.dataset)
        sample_size = int(total_samples * self.sample_fraction)
        sampled_indices = np.random.choice(total_samples, sample_size, replace=False)
        sampled_test_loader = DataLoader(Subset(self.test_loader.dataset, sampled_indices), batch_size=self.test_loader.batch_size)

        # Model evaluation
        self.model.eval()
        predictions, true_labels = [], []

        with torch.no_grad():
            for batch in self.test_loader:
                b_input_ids = batch['input_ids'].to(device)
                b_attention_mask = batch['attention_mask'].to(device)
                b_labels = batch['labels'].to(device)

                outputs = self.model(b_input_ids, attention_mask=b_attention_mask)
                preds = torch.argmax(outputs.logits, dim=1).flatten()
                
                predictions.extend(preds.cpu().numpy())
                true_labels.extend(b_labels.cpu().numpy())

        accuracy = np.mean(np.array(predictions) == np.array(true_labels))
        print(f"Sampled Accuracy: {accuracy:.4f}")

        # Check for accuracy drop
        if accuracy < self.accuracy_threshold:
            print("Model accuracy is below threshold, retraining recommended!")
            return True  # Indicates that retraining is recommended

        self.last_accuracy = accuracy
        return False  # No retraining needed

# Step 6: Data Drift Detection with Progress
class DataDriftMonitor:
    def __init__(self, training_data, threshold=0.05):
        self.training_data = training_data
        self.threshold = threshold

    def check_drift(self, new_data):
        ks_statistic, p_value = stats.ks_2samp(self.training_data, new_data)
        print(f"KS Statistic: {ks_statistic:.4f}, P-value: {p_value:.4f}")

        if p_value < self.threshold:
            print("Data drift detected, retraining recommended!")
            return True  # Indicates that retraining is recommended

        return False  # No drift detected

# Step 7: Monitor and Retrain with Progress
monitor = ModelMonitor(model, test_loader)
needs_retraining = monitor.evaluate_model()

# Assume we get new incoming data for checking drift
new_data = train_df['review']  # Replace this with the actual new data
drift_monitor = DataDriftMonitor(train_df['review'].tolist())
data_drift_detected = drift_monitor.check_drift(new_data)

if needs_retraining or data_drift_detected:
    print("Initiating retraining process...")
    for epoch in range(epochs):  # Specify epochs
        train_model(model, train_loader, optimizer, epochs=1)
    
    model.save_pretrained('./new_model_ps1')
    tokenizer.save_pretrained('./new_tokenizer_ps1')
    
    with mlflow.start_run():
        mlflow.pytorch.log_model(model, "new_model_ps1")  # Log the model
        mlflow.log_metric("accuracy", monitor.last_accuracy)  # Log accuracy
        mlflow.log_param("retraining_due_to", "accuracy drop" if needs_retraining else "data drift")  # Reason for retraining
        print("New model version logged to MLflow")


Sampled Accuracy: 0.8800
KS Statistic: 0.0000, P-value: 1.0000


In [3]:
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [1]:
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from sklearn.metrics import accuracy_score
import torch
import pandas as pd
import os

# Define device, model, tokenizer, and accuracy threshold
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BertForSequenceClassification.from_pretrained('./bert_model').to(device)
tokenizer = BertTokenizer.from_pretrained('./bert_tokenizer')
model.eval()  # Set model to evaluation mode
accuracy_threshold = 0.8  # Define your accuracy threshold

# Path to log file
log_path = 'user_inputs_log.csv'

# Function to log user inputs
def log_user_input(text, true_label):
    log_exists = os.path.isfile(log_path)
    df = pd.DataFrame([[text, true_label]], columns=['text', 'label'])
    df.to_csv(log_path, mode='a', header=not log_exists, index=False)

# Function to preprocess text and predict label
def predict(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        predicted_label = torch.argmax(outputs.logits, dim=1).item()
    return predicted_label

# Function to calculate accuracy on logged data
def evaluate_accuracy():
    if not os.path.isfile(log_path):
        return 1.0  # Return perfect accuracy if no data yet
    
    data = pd.read_csv(log_path)
    texts = data['text'].tolist()
    true_labels = data['label'].tolist()
    predicted_labels = [predict(text) for text in texts]
    return accuracy_score(true_labels, predicted_labels)

# Function to fine-tune model based on logged data
def fine_tune_model():
    # Load logged data for retraining
    data = pd.read_csv(log_path)
    texts = data['text'].tolist()
    labels = data['label'].tolist()
    
    # Tokenize inputs
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
    inputs = {key: value.to(device) for key, value in inputs.items()}
    labels = torch.tensor(labels).to(device)
    
    # Set model to training mode
    model.train()
    optimizer = AdamW(model.parameters(), lr=1e-5)
    
    # Fine-tune for a small number of epochs
    epochs = 3
    for epoch in range(epochs):
        optimizer.zero_grad()
        outputs = model(**inputs, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
    
    # Save the updated model
    model.save_pretrained('updated_bert_model')
    model.eval()  # Return model to evaluation mode
    print("Model fine-tuned and updated.")

# Main function to handle user input, prediction, logging, and evaluation
def handle_user_input(text, true_label):
    # Log the input
    log_user_input(text, true_label)
    
    # Predict and display prediction accuracy
    predicted_label = predict(text)
    accuracy = evaluate_accuracy()
    print(f"Predicted Label: {predicted_label}")
    print(f"Current Model Accuracy: {accuracy:.2f}")
    
    # Retrain if accuracy drops below threshold
    if accuracy < accuracy_threshold:
        print("Accuracy below threshold, retraining model...")
        fine_tune_model()

# Example usage with sample inputs
handle_user_input("Sample text for testing.", true_label=1)


Predicted Label: 0
Current Model Accuracy: 0.00
Accuracy below threshold, retraining model...




Model fine-tuned and updated.


In [2]:
# full code


import os
import pandas as pd

def load_data_from_directory(directory):
    reviews = []
    labels = []
    
    for label in ['pos', 'neg']:
        folder_path = os.path.join(directory, label)
        label_value = 1 if label == 'pos' else 0
        
        for filename in os.listdir(folder_path):
            if filename.endswith('.txt'):  # Ensure we only read text files
                with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as f:
                    reviews.append(f.read())
                    labels.append(label_value)

    return pd.DataFrame({'review': reviews, 'label': labels})

# Load the training and testing data
train_df = load_data_from_directory('IMDB/train')
test_df = load_data_from_directory('IMDB/test')

# Display the first few rows of the training dataset
print(train_df.head())
import os
import pandas as pd
import re
import string
import torch
from transformers import BertTokenizer
def clean_text(text):
    text = re.sub(r'<.*?>', '', text)  # Remove HTML tags
    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    text = re.sub(r'\d+', '', text)  # Remove numbers (optional)
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\s+', ' ', text).strip()  # Remove extra whitespace
    return text
def load_data_from_directory(directory):
    reviews = []
    labels = []
    
    for label in ['pos', 'neg']:
        folder_path = os.path.join(directory, label)  # Using the directory parameter
        label_value = 1 if label == 'pos' else 0
        
        for filename in os.listdir(folder_path):
            if filename.endswith('.txt'):
                with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as f:
                    reviews.append(f.read())
                    labels.append(label_value)

    return pd.DataFrame({'review': reviews, 'label': labels})
train_df = load_data_from_directory('IMDB/train')
test_df = load_data_from_directory('IMDB/test')
train_df['review'] = train_df['review'].apply(clean_text)
test_df['review'] = test_df['review'].apply(clean_text)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
tokens = tokenizer.batch_encode_plus(
    train_df['review'].tolist(),
    max_length=128,
    padding=True,
    truncation=True,
    return_tensors='pt'
)
train_input_ids = tokens['input_ids']
train_attention_mask = tokens['attention_mask']
train_labels = torch.tensor(train_df['label'].tolist())
class IMDBDataset(torch.utils.data.Dataset):
    def __init__(self, input_ids, attention_mask, labels):
        self.input_ids = input_ids
        self.attention_mask = attention_mask
        self.labels = labels

    def __getitem__(self, idx):
        return {
            'input_ids': self.input_ids[idx],
            'attention_mask': self.attention_mask[idx],
            'labels': self.labels[idx]
        }

    def __len__(self):
        return len(self.labels)
train_dataset = IMDBDataset(train_input_ids, train_attention_mask, train_labels)
test_tokens = tokenizer.batch_encode_plus(
    test_df['review'].tolist(),
    max_length=128,
    padding=True,
    truncation=True,
    return_tensors='pt'
)
test_input_ids = test_tokens['input_ids']
test_attention_mask = test_tokens['attention_mask']
test_labels = torch.tensor(test_df['label'].tolist())

# Create the testing dataset
test_dataset = IMDBDataset(test_input_ids, test_attention_mask, test_labels)
from torch.utils.data import DataLoader, TensorDataset
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
from transformers import BertForSequenceClassification, AdamW
from tqdm import tqdm

# Initialize the model
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)
optimizer = AdamW(model.parameters(), lr=2e-5)

# Move the model to GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Set the model in training mode
model.train()

# Training Loop
for epoch in range(3):  # Set the number of epochs
    print(f'Epoch {epoch + 1}/{3}')
    total_loss = 0

    for batch in tqdm(train_loader):
        # Move data to GPU if available
        b_input_ids = batch['input_ids'].to(device)
        b_attention_mask = batch['attention_mask'].to(device)
        b_labels = batch['labels'].to(device)
    
        # Clear previous gradients
        optimizer.zero_grad()
    
        # Forward pass
        outputs = model(b_input_ids, attention_mask=b_attention_mask, labels=b_labels)
        loss = outputs.loss
        total_loss += loss.item()
    
        # Backward pass
        loss.backward()
        optimizer.step()

    avg_loss = total_loss / len(train_loader)
    print(f'Average Training Loss: {avg_loss}')
model_save_path = 'bert_model'
tokenizer_save_path = 'bert_tokenizer'

# Save the trained model
model.save_pretrained(model_save_path)

# Save the tokenizer (assumed it's already initialized)
tokenizer.save_pretrained(tokenizer_save_path)

print(f'Model and tokenizer saved to {model_save_path} and {tokenizer_save_path}')
import mlflow
from tqdm import tqdm
import numpy as np
from torch.utils.data import DataLoader, TensorDataset, Subset


# Step 5: Model Monitoring with Progress
class ModelMonitor:
    def __init__(self, model, test_loader, accuracy_threshold=0.85, drift_threshold=0.05, sample_fraction=0.2):
        self.model = model
        self.test_loader = test_loader
        self.last_accuracy = 0
        self.accuracy_threshold = accuracy_threshold
        self.drift_threshold = drift_threshold
        self.sample_fraction = sample_fraction

    def evaluate_model(self):
        # Sample a subset of test_loader for evaluation
        total_samples = len(self.test_loader.dataset)
        sample_size = int(total_samples * self.sample_fraction)
        sampled_indices = np.random.choice(total_samples, sample_size, replace=False)
        sampled_test_loader = DataLoader(Subset(self.test_loader.dataset, sampled_indices), batch_size=self.test_loader.batch_size)

        # Model evaluation
        self.model.eval()
        predictions, true_labels = [], []

        with torch.no_grad():
            for batch in self.test_loader:
                b_input_ids = batch['input_ids'].to(device)
                b_attention_mask = batch['attention_mask'].to(device)
                b_labels = batch['labels'].to(device)

                outputs = self.model(b_input_ids, attention_mask=b_attention_mask)
                preds = torch.argmax(outputs.logits, dim=1).flatten()
                
                predictions.extend(preds.cpu().numpy())
                true_labels.extend(b_labels.cpu().numpy())

        accuracy = np.mean(np.array(predictions) == np.array(true_labels))
        print(f"Sampled Accuracy: {accuracy:.4f}")

        # Check for accuracy drop
        if accuracy < self.accuracy_threshold:
            print("Model accuracy is below threshold, retraining recommended!")
            return True  # Indicates that retraining is recommended

        self.last_accuracy = accuracy
        return False  # No retraining needed

# Step 6: Data Drift Detection with Progress
class DataDriftMonitor:
    def __init__(self, training_data, threshold=0.05):
        self.training_data = training_data
        self.threshold = threshold

    def check_drift(self, new_data):
        ks_statistic, p_value = stats.ks_2samp(self.training_data, new_data)
        print(f"KS Statistic: {ks_statistic:.4f}, P-value: {p_value:.4f}")

        if p_value < self.threshold:
            print("Data drift detected, retraining recommended!")
            return True  # Indicates that retraining is recommended

        return False  # No drift detected

# Step 7: Monitor and Retrain with Progress
monitor = ModelMonitor(model, test_loader)
needs_retraining = monitor.evaluate_model()

# Assume we get new incoming data for checking drift
new_data = train_df['review']  # Replace this with the actual new data
drift_monitor = DataDriftMonitor(train_df['review'].tolist())
data_drift_detected = drift_monitor.check_drift(new_data)

if needs_retraining or data_drift_detected:
    print("Initiating retraining process...")
    for epoch in range(epochs):  # Specify epochs
        train_model(model, train_loader, optimizer, epochs=1)
    
    model.save_pretrained('./new_model_ps1')
    tokenizer.save_pretrained('./new_tokenizer_ps1')
    
    with mlflow.start_run():
        mlflow.pytorch.log_model(model, "new_model_ps1")  # Log the model
        mlflow.log_metric("accuracy", monitor.last_accuracy)  # Log accuracy
        mlflow.log_param("retraining_due_to", "accuracy drop" if needs_retraining else "data drift")  # Reason for retraining
        print("New model version logged to MLflow")

from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from sklearn.metrics import accuracy_score
import torch
import pandas as pd
import os

# Define device, model, tokenizer, and accuracy threshold
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BertForSequenceClassification.from_pretrained('./bert_model').to(device)
tokenizer = BertTokenizer.from_pretrained('./bert_tokenizer')
model.eval()  # Set model to evaluation mode
accuracy_threshold = 0.8  # Define your accuracy threshold

# Path to log file
log_path = 'user_inputs_log.csv'

# Function to log user inputs
def log_user_input(text, true_label):
    log_exists = os.path.isfile(log_path)
    df = pd.DataFrame([[text, true_label]], columns=['text', 'label'])
    df.to_csv(log_path, mode='a', header=not log_exists, index=False)

# Function to preprocess text and predict label
def predict(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        predicted_label = torch.argmax(outputs.logits, dim=1).item()
    return predicted_label

# Function to calculate accuracy on logged data
def evaluate_accuracy():
    if not os.path.isfile(log_path):
        return 1.0  # Return perfect accuracy if no data yet
    
    data = pd.read_csv(log_path)
    texts = data['text'].tolist()
    true_labels = data['label'].tolist()
    predicted_labels = [predict(text) for text in texts]
    return accuracy_score(true_labels, predicted_labels)

# Function to fine-tune model based on logged data
def fine_tune_model():
    # Load logged data for retraining
    data = pd.read_csv(log_path)
    texts = data['text'].tolist()
    labels = data['label'].tolist()
    
    # Tokenize inputs
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
    inputs = {key: value.to(device) for key, value in inputs.items()}
    labels = torch.tensor(labels).to(device)
    
    # Set model to training mode
    model.train()
    optimizer = AdamW(model.parameters(), lr=1e-5)
    
    # Fine-tune for a small number of epochs
    epochs = 3
    for epoch in range(epochs):
        optimizer.zero_grad()
        outputs = model(**inputs, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
    
    # Save the updated model
    model.save_pretrained('updated_bert_model')
    model.eval()  # Return model to evaluation mode
    print("Model fine-tuned and updated.")

# Main function to handle user input, prediction, logging, and evaluation
def handle_user_input(text, true_label):
    # Log the input
    log_user_input(text, true_label)
    
    # Predict and display prediction accuracy
    predicted_label = predict(text)
    accuracy = evaluate_accuracy()
    print(f"Predicted Label: {predicted_label}")
    print(f"Current Model Accuracy: {accuracy:.2f}")
    
    # Retrain if accuracy drops below threshold
    if accuracy < accuracy_threshold:
        print("Accuracy below threshold, retraining model...")
        fine_tune_model()

# Example usage with sample inputs
handle_user_input("Sample text for testing.", true_label=1)

