In [1]:
# File: auto_retrain.py
import os
import pandas as pd
import torch
import numpy as np
from transformers import (
    AutoTokenizer, 
    AutoModelForSeq2SeqLM,
    get_linear_schedule_with_warmup
)
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW  # Correct import location
from tqdm import tqdm
import logging
from pathlib import Path
from datetime import datetime

In [2]:
# --------------------------
# 🛠️ CONFIGURATION
# --------------------------
MODEL_PATH = "bart_summarizer_with_rl"
FEEDBACK_CSV = "feedback_data/feedback_log.csv"
RETRAINED_MODEL_PATH = "bart_summarizer_with_rl_retrained"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 4
EPOCHS = 3
LEARNING_RATE = 5e-5
PPO_CLIP = 0.2
ENTROPY_COEF = 0.01
FEEDBACK_THRESHOLD = 10  # Minimum positive feedbacks for retraining

# Setup logging
logging.basicConfig(
    filename='retraining.log',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

In [3]:

# --------------------------
# 📊 DATA PREPARATION
# --------------------------
class FeedbackDataset(Dataset):
    """Custom dataset for handling feedback data"""
    def __init__(self, tokenizer, dataframe, max_length=1024):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.max_length = max_length
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        article = str(row['article'])
        summary = str(row['summary'])
        feedback = int(row['feedback'])
        
        # Tokenize inputs
        inputs = self.tokenizer(
            article,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        
        # Tokenize targets
        with self.tokenizer.as_target_tokenizer():
            labels = self.tokenizer(
                summary,
                max_length=150,
                padding='max_length',
                truncation=True,
                return_tensors='pt'
            )
        
        return {
            'input_ids': inputs['input_ids'].squeeze(),
            'attention_mask': inputs['attention_mask'].squeeze(),
            'labels': labels['input_ids'].squeeze(),
            'feedback': torch.tensor(feedback, dtype=torch.float)
        }

def load_feedback_data(tokenizer):
    """Load and prepare feedback data for training"""
    try:
        df = pd.read_csv(FEEDBACK_CSV)
        
        # Filter only positive feedback (likes)
        positive_df = df[df['feedback'] == 1]
        
        if len(positive_df) < FEEDBACK_THRESHOLD:
            logging.info(f"Not enough positive feedbacks ({len(positive_df)}/{FEEDBACK_THRESHOLD})")
            return None
            
        return FeedbackDataset(tokenizer, positive_df)
        
    except Exception as e:
        logging.error(f"Error loading feedback data: {str(e)}")
        return None

In [4]:
# --------------------------
# 🧠 REWARD MODEL (Shallow Neural Network)
# --------------------------
class RewardModel(torch.nn.Module):
    """Shallow neural network for predicting reward scores"""
    def __init__(self, input_size=768, hidden_size=128):
        super().__init__()
        self.fc1 = torch.nn.Linear(input_size, hidden_size)
        self.fc2 = torch.nn.Linear(hidden_size, 1)
        self.dropout = torch.nn.Dropout(0.1)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        return torch.sigmoid(self.fc2(x))

In [5]:
# --------------------------
# 🔄 PPO TRAINING
# --------------------------
def ppo_train_step(model, batch, reward_model, optimizer, ppo_clip=0.2, entropy_coef=0.01):
    """Perform one PPO training step"""
    # Get model predictions
    outputs = model(
        input_ids=batch['input_ids'],
        attention_mask=batch['attention_mask'],
        labels=batch['labels']
    )
    
    # Get rewards from reward model
    with torch.no_grad():
        # Use CLS token or mean pooling for reward model input
        pooled_output = outputs.encoder_last_hidden_state.mean(dim=1)
        rewards = reward_model(pooled_output).squeeze()
    
    # Calculate policy loss
    log_probs = outputs.logits.log_softmax(dim=-1)
    advantage = rewards - rewards.mean()
    
    # PPO clipping
    ratio = torch.exp(log_probs - log_probs.detach())
    clip_adv = torch.clamp(ratio, 1-ppo_clip, 1+ppo_clip) * advantage
    policy_loss = -torch.min(ratio * advantage, clip_adv).mean()
    
    # Entropy bonus
    entropy = -(torch.exp(log_probs) * log_probs).mean()
    
    # Total loss
    loss = policy_loss - entropy_coef * entropy + outputs.loss
    
    # Backpropagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    return loss.item()

In [6]:
# --------------------------
# 🔄 RETRAINING FUNCTION
# --------------------------
def retrain_model():
    """Main retraining function with PPO"""
    try:
        # Load tokenizer and model
        tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
        model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_PATH).to(DEVICE)
        
        # Initialize reward model with fallback options
        reward_model = RewardModel().to(DEVICE)
        if os.path.exists("reward_model.pth"):
            try:
                reward_model.load_state_dict(torch.load("reward_model.pth", map_location=DEVICE))
                logging.info("Successfully loaded reward model")
            except Exception as e:
                logging.warning(f"Failed to load reward model: {str(e)}. Initializing new one.")
                reward_model.apply(self._init_weights)
        else:
            logging.warning("No reward_model.pth found. Initializing new reward model.")
            reward_model.apply(self._init_weights)
            # Optionally train a basic reward model first
            if len(pd.read_csv(FEEDBACK_CSV)) >= FEEDBACK_THRESHOLD:
                logging.info("Training initial reward model...")
                train_basic_reward_model()  # Implement this function
        
        # Load feedback data
        dataset = load_feedback_data(tokenizer)
        if dataset is None:
            return False
            
        dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
        
        # Setup optimizer
        optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=100,
            num_training_steps=len(dataloader) * EPOCHS
        )
        
        # Training loop
        model.train()
        reward_model.train()  # Allow reward model to learn during training
        logging.info("Starting retraining process...")
        
        for epoch in range(EPOCHS):
            epoch_loss = 0
            progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{EPOCHS}")
            
            for batch in progress_bar:
                batch = {k: v.to(DEVICE) for k, v in batch.items()}
                
                # PPO training step with error handling
                try:
                    loss = ppo_train_step(
                        model,
                        batch,
                        reward_model,
                        optimizer,
                        PPO_CLIP,
                        ENTROPY_COEF
                    )
                    epoch_loss += loss
                    progress_bar.set_postfix(loss=loss)
                    scheduler.step()
                except Exception as e:
                    logging.error(f"Error in batch processing: {str(e)}")
                    continue
            
            # Save intermediate checkpoints
            if (epoch + 1) % 2 == 0:  # Save every 2 epochs
                torch.save(reward_model.state_dict(), f"reward_model_epoch_{epoch+1}.pth")
            
            logging.info(f"Epoch {epoch+1} completed. Avg loss: {epoch_loss/len(dataloader)}")
        
        # Save final models
        model.save_pretrained(RETRAINED_MODEL_PATH)
        tokenizer.save_pretrained(RETRAINED_MODEL_PATH)
        torch.save(reward_model.state_dict(), "reward_model.pth")
        
        # Archive feedback data
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        archive_dir = Path("feedback_data/archive")
        archive_dir.mkdir(exist_ok=True)
        os.rename(FEEDBACK_CSV, archive_dir / f"feedback_{timestamp}.csv")
        
        logging.info("Retraining completed successfully!")
        return True
        
    except Exception as e:
        logging.error(f"Retraining failed: {str(e)}", exc_info=True)
        return False

def _init_weights(self, module):
    """Initialize weights for the reward model"""
    if isinstance(module, nn.Linear):
        torch.nn.init.xavier_uniform_(module.weight)
        if module.bias is not None:
            module.bias.data.zero_()

def train_basic_reward_model():
    """Train a simple initial reward model if none exists"""
    try:
        df = pd.read_csv(FEEDBACK_CSV)
        if len(df) < 10:  # Minimum samples needed
            return False
            
        tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
        reward_model = RewardModel().to(DEVICE)
        optimizer = AdamW(reward_model.parameters(), lr=1e-4)
        
        # Simple training loop
        for epoch in range(3):  # Fewer epochs for initial training
            for _, row in df.iterrows():
                inputs = tokenizer(
                    f"{row['article']} [SEP] {row['summary']}", 
                    return_tensors='pt'
                ).to(DEVICE)
                target = torch.tensor([row['feedback']], dtype=torch.float).to(DEVICE)
                
                outputs = reward_model(inputs['input_ids'])
                loss = torch.nn.functional.binary_cross_entropy(outputs, target)
                
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
        
        torch.save(reward_model.state_dict(), "reward_model.pth")
        return True
    except Exception as e:
        logging.error(f"Basic reward model training failed: {str(e)}")
        return False

In [None]:
# --------------------------
# 🚀 AUTOMATION ENTRY POINT
# --------------------------
if __name__ == "__main__":
    # Check if retraining is needed
    if os.path.exists(FEEDBACK_CSV):
        df = pd.read_csv(FEEDBACK_CSV)
        positive_feedbacks = len(df[df['feedback'] == 1])
        
        if positive_feedbacks >= FEEDBACK_THRESHOLD:
            logging.info(f"Starting retraining with {positive_feedbacks} positive feedbacks")
            success = retrain_model()
            
            if success:
                print("✅ Retraining completed successfully!")
            else:
                print("❌ Retraining failed. Check logs for details.")
        else:
            print(f"Not enough feedbacks ({positive_feedbacks}/{FEEDBACK_THRESHOLD})")
    else:
        print("No feedback data found")



❌ Retraining failed. Check logs for details.


: 

In [None]:
import torch
import torch.nn as nn  # This is the missing import
import torch.nn.functional as F
from torch.optim import AdamW
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    get_linear_schedule_with_warmup
)
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import os
from pathlib import Path
import logging
from tqdm import tqdm
from datetime import datetime

# --------------------------
# 🛠️ CONFIGURATION
# --------------------------
MODEL_PATH = "bart_summarizer_with_rl"
FEEDBACK_CSV = "feedback_data/feedback_log.csv"
RETRAINED_MODEL_PATH = "bart_summarizer_with_rl_retrained"
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 4
EPOCHS = 3
LEARNING_RATE = 5e-5
PPO_CLIP = 0.2
ENTROPY_COEF = 0.01
FEEDBACK_THRESHOLD = 10

# Setup logging
logging.basicConfig(
    filename='retraining.log',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

# --------------------------
# 🧠 REWARD MODEL
# --------------------------
class RewardModel(nn.Module):
    def __init__(self, input_size=768, hidden_size=128):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, 1)
        self.dropout = nn.Dropout(0.1)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        return torch.sigmoid(self.fc2(x))

# --------------------------
# 🔄 RETRAINING UTILITIES
# --------------------------
def _init_weights(module):
    """Initialize weights for the reward model"""
    if isinstance(module, nn.Linear):
        nn.init.xavier_uniform_(module.weight)
        if module.bias is not None:
            module.bias.data.zero_()

def train_basic_reward_model():
    """Train a simple initial reward model if none exists"""
    try:
        df = pd.read_csv(FEEDBACK_CSV)
        if len(df) < 10:  # Minimum samples needed
            return False
            
        tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
        reward_model = RewardModel().to(DEVICE)
        optimizer = AdamW(reward_model.parameters(), lr=1e-4)
        
        # Simple training loop
        for epoch in range(3):  # Fewer epochs for initial training
            for _, row in df.iterrows():
                inputs = tokenizer(
                    f"{row['article']} [SEP] {row['summary']}", 
                    return_tensors='pt'
                ).to(DEVICE)
                target = torch.tensor([row['feedback']], dtype=torch.float).to(DEVICE)
                
                outputs = reward_model(inputs['input_ids'].float())
                loss = F.binary_cross_entropy(outputs, target)
                
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
        
        torch.save(reward_model.state_dict(), "reward_model.pth")
        return True
    except Exception as e:
        logging.error(f"Basic reward model training failed: {str(e)}")
        return False

# --------------------------
# 🔄 RETRAINING FUNCTION
# --------------------------
def retrain_model():
    """Main retraining function with PPO"""
    try:
        # Load tokenizer and model
        tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
        model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_PATH).to(DEVICE)
        
        # Initialize reward model with proper weight initialization
        reward_model = RewardModel().to(DEVICE)
        if os.path.exists("reward_model.pth"):
            try:
                reward_model.load_state_dict(torch.load("reward_model.pth", map_location=DEVICE))
                logging.info("Successfully loaded reward model")
            except Exception as e:
                logging.warning(f"Failed to load reward model: {str(e)}. Initializing new one.")
                reward_model.apply(_init_weights)
        else:
            logging.warning("No reward_model.pth found. Initializing new reward model.")
            reward_model.apply(_init_weights)
            train_basic_reward_model()  # Train initial reward model
        
        # Load feedback data
        if not os.path.exists(FEEDBACK_CSV):
            logging.error("Feedback file not found")
            return False
            
        df = pd.read_csv(FEEDBACK_CSV)
        positive_feedback = df[df['feedback'] == 1]
        if len(positive_feedback) < FEEDBACK_THRESHOLD:
            logging.info(f"Not enough positive feedbacks ({len(positive_feedback)}/{FEEDBACK_THRESHOLD})")
            return False
            
        # Prepare dataloader
        dataset = FeedbackDataset(tokenizer, positive_feedback)
        dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)
        
        # Setup optimizer
        optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=100,
            num_training_steps=len(dataloader) * EPOCHS
        )
        
        # Training loop
        model.train()
        reward_model.train()
        logging.info("Starting retraining process...")
        
        for epoch in range(EPOCHS):
            epoch_loss = 0
            progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{EPOCHS}")
            
            for batch in progress_bar:
                batch = {k: v.to(DEVICE) for k, v in batch.items()}
                
                try:
                    # PPO training step would go here
                    # For now using basic training
                    outputs = model(
                        input_ids=batch['input_ids'],
                        attention_mask=batch['attention_mask'],
                        labels=batch['labels']
                    )
                    loss = outputs.loss
                    
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    scheduler.step()
                    
                    epoch_loss += loss.item()
                    progress_bar.set_postfix(loss=loss.item())
                except Exception as e:
                    logging.error(f"Error in batch processing: {str(e)}")
                    continue
            
            # Save checkpoint
            torch.save(reward_model.state_dict(), f"reward_model_epoch_{epoch+1}.pth")
            logging.info(f"Epoch {epoch+1} completed. Avg loss: {epoch_loss/len(dataloader)}")
        
        # Save final models
        model.save_pretrained(RETRAINED_MODEL_PATH)
        tokenizer.save_pretrained(RETRAINED_MODEL_PATH)
        torch.save(reward_model.state_dict(), "reward_model.pth")
        
        # Archive feedback data
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        archive_dir = Path("feedback_data/archive")
        archive_dir.mkdir(exist_ok=True)
        os.rename(FEEDBACK_CSV, archive_dir / f"feedback_{timestamp}.csv")
        
        logging.info("Retraining completed successfully!")
        return True
        
    except Exception as e:
        logging.error(f"Retraining failed: {str(e)}", exc_info=True)
        return False

if __name__ == "__main__":
    if os.path.exists(FEEDBACK_CSV):
        df = pd.read_csv(FEEDBACK_CSV)
        positive_count = len(df[df['feedback'] == 1])
        
        if positive_count >= FEEDBACK_THRESHOLD:
            print(f"Starting retraining with {positive_count} positive feedbacks")
            if retrain_model():
                print("✅ Retraining successful!")
            else:
                print("❌ Retraining failed - check retraining.log")
        else:
            print(f"Need {FEEDBACK_THRESHOLD} positive feedbacks (have {positive_count})")
    else:
        print("No feedback data found at:", FEEDBACK_CSV)

Starting retraining with 15 positive feedbacks


