In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, clone_model
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from typing import List, Dict
import numpy as np

class UserDevice:
    def __init__(self, user_id: int, local_data: List[str], tokenizer: Tokenizer, 
                 vocab_size: int, max_sequence_length: int):
        """Initialize a user device with local data and model."""
        self.user_id = user_id
        self.local_data = local_data
        self.tokenizer = tokenizer
        self.vocab_size = vocab_size
        self.max_sequence_length = max_sequence_length
        self.model = self._create_model()
        
    def _create_model(self) -> Sequential:
        """Create the local model architecture."""
        model = Sequential([
            Embedding(self.vocab_size, 100, input_length=self.max_sequence_length-1),
            LSTM(100, return_sequences=True),
            LSTM(100),
            Dense(100, activation='relu'),
            Dense(self.vocab_size, activation='softmax')
        ])
        
        model.compile(
            loss='sparse_categorical_crossentropy',
            optimizer='adam',
            metrics=['accuracy']
        )
        return model
    
    def prepare_sequences(self) -> tuple:
        """Prepare input sequences and target words from local data."""
        sequences = self.tokenizer.texts_to_sequences(self.local_data)
        
        input_sequences = []
        for sequence in sequences:
            for i in range(1, len(sequence)):
                n_gram_sequence = sequence[:i+1]
                input_sequences.append(n_gram_sequence)
        
        padded_sequences = pad_sequences(
            input_sequences, 
            maxlen=self.max_sequence_length, 
            padding='pre'
        )
        
        X = padded_sequences[:, :-1]
        y = padded_sequences[:, -1]
        
        return X, y
    
    def train_local_model(self, epochs=1, batch_size=32):
        """Train the local model on user's data."""
        X, y = self.prepare_sequences()
        if len(X) > 0:
            self.model.fit(X, y, epochs=epochs, batch_size=batch_size, verbose=0)
    
    def get_model_weights(self) -> List:
        """Return the local model weights."""
        return self.model.get_weights()
    
    def set_model_weights(self, weights: List):
        """Update local model with global weights."""
        self.model.set_weights(weights)

class FederatedServer:
    def __init__(self, model_template: Sequential):
        """Initialize the federated learning server."""
        self.global_model = model_template
        
    def aggregate_weights(self, local_weights: List[List]) -> List:
        """Aggregate local model weights using FedAvg algorithm."""
        # Simple averaging of weights
        averaged_weights = []
        for weights_list_tuple in zip(*local_weights):
            averaged_weights.append(
                np.array([np.array(w).mean(axis=0) for w in zip(*weights_list_tuple)])
            )
        return averaged_weights

def load_data_from_file(file_path: str) -> List[List[str]]:
    """Load user data from a text file where each line is a sentence."""
    with open(file_path, 'r') as file:
        lines = file.readlines()
    
    # Assuming each line is a sentence and splitting them into chunks for different users.
    # For simplicity here we assign every 5 lines to one user.
    user_data = [lines[i:i + 5] for i in range(0, len(lines), 5)]
    
    # Strip any extra whitespace or newline characters.
    user_data = [[line.strip() for line in user_lines] for user_lines in user_data]
    
    return user_data

def simulate_federated_learning():
    # Load user data from text file (replace 'user_data.txt' with your actual file path)
    user_data_file_path = 'C:\\Users\\Alireza217\\Desktop\\user_data.txt.txt'
    user_data = load_data_from_file(user_data_file_path)
    
    # Initialize tokenizer
    tokenizer = Tokenizer()
    all_texts = [text for user_texts in user_data for text in user_texts]
    tokenizer.fit_on_texts(all_texts)
    
    # Parameters
    vocab_size = len(tokenizer.word_index) + 1
    max_sequence_length = max(len(text.split()) for text in all_texts) + 1
    
    # Create template model for server
    template_model = Sequential([
        Embedding(vocab_size, 100, input_length=max_sequence_length-1),
        LSTM(100, return_sequences=True),
        LSTM(100),
        Dense(100, activation='relu'),
        Dense(vocab_size, activation='softmax')
    ])
    template_model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )
    
    # Initialize devices and server
    devices = [
        UserDevice(i, data, tokenizer, vocab_size, max_sequence_length)
        for i, data in enumerate(user_data)
    ]
    server = FederatedServer(template_model)
    
    # Training parameters
    n_rounds = 5
    local_epochs = 3
    
    print("Starting Federated Learning Simulation...")
    
    for round_num in range(n_rounds):
        print(f"\nRound {round_num + 1}")
        
        # Local training on each device
        local_weights = []
        for device in devices:
            # Train local model
            device.train_local_model(epochs=local_epochs)
            
            # Send model weights to server
            local_weights.append(device.get_model_weights())
        
        # Server aggregates weights
        global_weights = server.aggregate_weights(local_weights)
        
        # Devices update their models with global weights
        for device in devices:
            device.set_model_weights(global_weights)
        
        print(f"Completed round {round_num + 1} of training")

    def predict_next_word(device, text, num_predictions=1):
        """Predict the next word given a text input."""
        sequence = device.tokenizer.texts_to_sequences([text])[0]
        padded = pad_sequences([sequence], maxlen=device.max_sequence_length-1, padding='pre')
        predictions = device.model.predict(padded, verbose=0)[0]
        
        top_indices = predictions.argsort()[-num_predictions:][::-1]
        
        words = []
        for idx in top_indices:
            for word, index in device.tokenizer.word_index.items():
                if index == idx:
                    words.append(word)
                    break
        return words

    # Test the model with a sample prediction
    test_device = devices[0]
    test_text = "Hello how are"
    predictions = predict_next_word(test_device, test_text)
    print(f"\nTest prediction for '{test_text}':")
    print(f"Predicted next words: {predictions}")

if __name__ == "__main__":
    simulate_federated_learning()

Starting Federated Learning Simulation...

Round 1
Completed round 1 of training

Round 2
Completed round 2 of training

Round 3
Completed round 3 of training

Round 4
Completed round 4 of training

Round 5
Completed round 5 of training

Test prediction for 'Hello how are':
Predicted next words: ['you']
