In [2]:
import os
import git

# Define the repository URL and the local directory to clone into
repo_url = "https://github.com/555-Satyajit/Farm-params.git"
local_dir = r"C:\Users\SATYAJIT\crop yield\Farm-Params"  # The directory where the repo will be cloned

# Clone the repository if it does not already exist
if not os.path.exists(local_dir):
    try:
        print("Cloning the repository...")
        git.Repo.clone_from(repo_url, local_dir)
        print("Repository cloned successfully!")
    except git.exc.GitCommandError as e:
        print(f"Error during cloning: {e}")
else:
    print("Repository already exists, skipping clone.")


Repository already exists, skipping clone.


In [4]:
import numpy as np
import os
from pymongo import MongoClient

# Path to the cloned repository
repo_path = "Farm-params"  # Adjust if your repo path is different

# Initialize MongoDB client
client = MongoClient('mongodb://localhost:27017/')  # Adjust if necessary
db = client['fed_avg']  # Name of the database
collection = db['model_params']  # Collection for storing model parameters

# Loop over client numbers (1 to 4) to load parameters and store them in MongoDB
for client_num in range(1, 5):
    # Load the coefficients and intercepts for each client
    coef = np.load(os.path.join(repo_path, f'coef_client_{client_num}.npy'))
    intercept = np.load(os.path.join(repo_path, f'intercept_client_{client_num}.npy'))
    
    # Convert numpy arrays to lists for MongoDB compatibility
    params = {
        'coef': coef.tolist(),
        'intercept': intercept.tolist()
    }
    
    # Insert the data into MongoDB
    collection.insert_one({f"client_{client_num}_params": params})
    
    print(f"Model parameters for Client {client_num} saved to MongoDB!")

# Wait for any ongoing processes to release the folder before deletion
time.sleep(2)

# Try to delete the repository folder after processing
try:
    shutil.rmtree(repo_path)
    print(f"Successfully deleted the repository at {repo_path}.")
except PermissionError as e:
    print(f"PermissionError: {e}. Try closing any processes that may be using the folder.")
    # Optionally, you could try deleting the folder manually if needed.
except Exception as e:
    print(f"Error during deletion: {e}")



Model parameters for Client 1 saved to MongoDB!
Model parameters for Client 2 saved to MongoDB!
Model parameters for Client 3 saved to MongoDB!
Model parameters for Client 4 saved to MongoDB!


NameError: name 'time' is not defined

In [6]:
import os
import git
import numpy as np
import shutil
import time
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
import numpy as np
from typing import Dict, List
import random

class FederatedServer:
    def __init__(self, num_rounds: int = 5, client_fraction: float = 0.8):
        self.global_model = None
        self.num_rounds = num_rounds
        self.client_fraction = client_fraction
        self.clients = []
        
    def initialize_global_model(self):
        """Initialize global model parameters"""
        self.global_model = {
            'coefficients': None,
            'intercept': None
        }
    
    def select_clients(self) -> List[int]:
        """Randomly select a fraction of clients for each round"""
        num_clients = len(self.clients)
        num_selected = max(1, int(self.client_fraction * num_clients))
        return random.sample(range(num_clients), num_selected)
    
    def aggregate_models(self, client_models: List[Dict]) -> Dict:
        """Aggregate client models using FedAvg"""
        coef_array = []
        intercept_array = []
        
        # Get the maximum coefficient size
        max_coef_size = max(coef.shape[0] for coef in 
                          [model['coefficients'] for model in client_models])
        
        for model in client_models:
            coef = model['coefficients']
            # Pad smaller arrays with zeros
            if coef.shape[0] < max_coef_size:
                coef = np.pad(coef, (0, max_coef_size - coef.shape[0]), 
                            mode='constant', constant_values=0)
            coef_array.append(coef)
            intercept_array.append(model['intercept'])
        
        return {
            'coefficients': np.mean(coef_array, axis=0),
            'intercept': np.mean(intercept_array, axis=0)
        }

class FederatedClient:
    def __init__(self, client_id: int, data_path: str):
        self.client_id = client_id
        self.data_path = data_path
        self.model = LinearRegression()
        self.scaler = StandardScaler()
        
    def load_data(self):
        """Load client's local data"""
        # In your case, loading pre-trained coefficients and intercepts
        self.coef = np.load(os.path.join(self.data_path, f'coef_client_{self.client_id}.npy'))
        self.intercept = np.load(os.path.join(self.data_path, 
                                            f'intercept_client_{self.client_id}.npy'))
    
    def train_local_model(self, global_model=None):
        """Train local model using client's data"""
        if global_model is not None and global_model['coefficients'] is not None:
            # Initialize local model with global parameters
            self.coef = global_model['coefficients']
            self.intercept = global_model['intercept']
        
        # In a real implementation, you would train the model here
        # For now, we're just using the pre-trained parameters
        
        return {
            'coefficients': self.coef,
            'intercept': self.intercept
        }

def main():
    # Define paths
    repo_url = "https://github.com/555-Satyajit/Farm-params.git"
    local_dir = r"C:\Users\SATYAJIT\crop yield\Farm-Params"
    repo_path = "Farm-params"

    # Clone repository
    if not os.path.exists(local_dir):
        try:
            print("Cloning the repository...")
            git.Repo.clone_from(repo_url, local_dir)
            print("Repository cloned successfully!")
        except git.exc.GitCommandError as e:
            print(f"Error during cloning: {e}")
            return
    else:
        print("Repository already exists, skipping clone.")

    # Initialize server
    server = FederatedServer(num_rounds=5, client_fraction=0.8)
    server.initialize_global_model()

    # Initialize clients
    for client_id in range(1, 5):
        client = FederatedClient(client_id, repo_path)
        try:
            client.load_data()
            server.clients.append(client)
        except Exception as e:
            print(f"Error initializing client {client_id}: {e}")

    # Federated Learning rounds
    for round_num in range(server.num_rounds):
        print(f"\nFederated Learning Round {round_num + 1}")
        
        # Select clients for this round
        selected_clients = server.select_clients()
        print(f"Selected clients: {[i+1 for i in selected_clients]}")
        
        # Collect client updates
        client_models = []
        for client_idx in selected_clients:
            client = server.clients[client_idx]
            # Train local model (in this case, just using pre-trained parameters)
            client_model = client.train_local_model(server.global_model)
            client_models.append(client_model)
            print(f"Client {client.client_id} completed local training")
        
        # Aggregate models
        server.global_model = server.aggregate_models(client_models)
        print(f"Round {round_num + 1} aggregation completed")

    # Save final global model
    try:
        with open('globalmodel1.pkl', 'wb') as f:
            pickle.dump(server.global_model, f)
        print("\nFinal global model saved as globalmodel1.pkl!")
    except Exception as e:
        print(f"Error saving global model: {e}")

    # Cleanup
    time.sleep(2)
    try:
        shutil.rmtree(repo_path)
        print(f"Successfully deleted the repository at {repo_path}.")
    except Exception as e:
        print(f"Error during cleanup: {e}")

if __name__ == "__main__":
    main()

Repository already exists, skipping clone.

Federated Learning Round 1
Selected clients: [4, 1, 3]
Client 4 completed local training
Client 1 completed local training
Client 3 completed local training
Round 1 aggregation completed

Federated Learning Round 2
Selected clients: [3, 2, 4]
Client 3 completed local training
Client 2 completed local training
Client 4 completed local training
Round 2 aggregation completed

Federated Learning Round 3
Selected clients: [1, 2, 3]
Client 1 completed local training
Client 2 completed local training
Client 3 completed local training
Round 3 aggregation completed

Federated Learning Round 4
Selected clients: [1, 3, 4]
Client 1 completed local training
Client 3 completed local training
Client 4 completed local training
Round 4 aggregation completed

Federated Learning Round 5
Selected clients: [1, 2, 4]
Client 1 completed local training
Client 2 completed local training
Client 4 completed local training
Round 5 aggregation completed

Final global mod

In [21]:
import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
import matplotlib.pyplot as plt

def analyze_data(df):
    """
    Analyze the dataset to understand the available values
    """
    print("\nData Analysis:")
    for column in df.columns:
        if df[column].dtype == 'object':
            print(f"\n{column} unique values:")
            print(df[column].unique())
    
    print("\nNumeric columns statistics:")
    print(df.describe())
    return df.select_dtypes(include=['object']).columns

def engineer_features(df):
    """
    Transform original features into expanded feature set
    """
    # Create a copy to avoid modifying original data
    X = df.copy()
    
    # Encode categorical variables
    le_dict = {}
    categorical_cols = ['Region', 'Soil_Type', 'Crop', 'Weather_Condition']
    for col in categorical_cols:
        le_dict[col] = LabelEncoder()
        X[col] = le_dict[col].fit_transform(X[col])
    
    # Create interaction features
    X['Rainfall_Temp_Interaction'] = X['Rainfall_mm'] * X['Temperature_Celsius']
    X['Growing_Conditions'] = X['Rainfall_mm'] / (X['Temperature_Celsius'] + 1)
    
    # Create agricultural specific features
    X['Water_Availability'] = X['Rainfall_mm'] + (X['Irrigation_Used'] * 500)
    X['Growth_Index'] = (X['Temperature_Celsius'] * X['Rainfall_mm'] * X['Fertilizer_Used']) / 1000
    
    # Create squared terms for numerical features
    numeric_features = ['Rainfall_mm', 'Temperature_Celsius', 'Days_to_Harvest']
    for feature in numeric_features:
        X[f'{feature}_Squared'] = X[feature] ** 2
    
    # Interaction between categorical and numerical features
    X['Region_Rainfall'] = X['Region'] * X['Rainfall_mm']
    X['Soil_Temperature'] = X['Soil_Type'] * X['Temperature_Celsius']
    
    # Binary features
    X['High_Rainfall'] = (X['Rainfall_mm'] > X['Rainfall_mm'].mean()).astype(int)
    X['High_Temperature'] = (X['Temperature_Celsius'] > X['Temperature_Celsius'].mean()).astype(int)
    
    # Normalize numeric features
    numeric_cols = X.select_dtypes(include=['float64', 'int64']).columns
    scaler = StandardScaler()
    X[numeric_cols] = scaler.fit_transform(X[numeric_cols])
    
    print("\nFeatures after engineering:")
    print(f"Number of features: {X.shape[1]}")
    print("Feature names:", list(X.columns))
    
    return X

def evaluate_global_model(model_path, X_test, y_test):
    """
    Evaluate the global model using various metrics
    """
    # Load the global model
    with open(model_path, 'rb') as f:
        global_model = pickle.load(f)
    
    print(f"\nInput shape: {X_test.shape}")
    print(f"Coefficient shape: {global_model['coefficients'].shape}")
    
    # Select only the first 17 features if we have more
    if X_test.shape[1] > 17:
        X_test = X_test.iloc[:, :17]
        print(f"Using first 17 features. New input shape: {X_test.shape}")
    
    # Make predictions
    y_pred = np.dot(X_test, global_model['coefficients']) + global_model['intercept']
    
    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    
    # Print metrics
    print("\nModel Evaluation Metrics:")
    print(f"R² Score: {r2:.4f}")
    print(f"MSE: {mse:.4f}")
    print(f"RMSE: {rmse:.4f}")
    
    # Create visualizations
    create_evaluation_plots(y_test, y_pred)
    
    return {
        'r2_score': r2,
        'mse': mse,
        'rmse': rmse,
        'predictions': y_pred,
        'errors': y_test - y_pred
    }

def create_evaluation_plots(y_test, y_pred):
    """Create and save evaluation plots"""
    # Scatter plot
    plt.figure(figsize=(10, 6))
    plt.scatter(y_test, y_pred, alpha=0.5)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
    plt.xlabel('Actual Yield (tons/hectare)')
    plt.ylabel('Predicted Yield (tons/hectare)')
    plt.title('Actual vs Predicted Crop Yield')
    plt.tight_layout()
    plt.savefig('yield_predictions.png')
    plt.close()
    
    # Error distribution
    errors = y_test - y_pred
    plt.figure(figsize=(10, 6))
    plt.hist(errors, bins=30, edgecolor='black')
    plt.xlabel('Prediction Error (tons/hectare)')
    plt.ylabel('Frequency')
    plt.title('Distribution of Prediction Errors')
    plt.tight_layout()
    plt.savefig('yield_errors.png')
    plt.close()

def main():
    # File paths
    data_path = "crop_yield.csv"
    model_path = "globalmodel1.pkl"
    
    try:
        # Load data
        print("Loading data...")
        df = pd.read_csv(data_path)
        
        # Analyze data first
        print("\nAnalyzing data structure...")
        categorical_columns = analyze_data(df)
        
        # Separate features and target
        X = df.drop(['Yield_tons_per_hectare'], axis=1)
        y = df['Yield_tons_per_hectare']
        
        # Perform feature engineering
        print("\nPerforming feature engineering...")
        X_engineered = engineer_features(X)
        
        # Split the data
        print("\nSplitting data into train and test sets...")
        X_train, X_test, y_train, y_test = train_test_split(
            X_engineered, y, test_size=0.2, random_state=42
        )
        
        # Evaluate model
        print("\nEvaluating model...")
        results = evaluate_global_model(model_path, X_test, y_test)
        
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        import traceback
        print("\nFull error traceback:")
        print(traceback.format_exc())

if __name__ == "__main__":
    main()

Loading data...

Analyzing data structure...

Data Analysis:

Region unique values:
['West' 'South' 'North' 'East']

Soil_Type unique values:
['Sandy' 'Clay' 'Loam' 'Silt' 'Peaty' 'Chalky']

Crop unique values:
['Cotton' 'Rice' 'Barley' 'Soybean' 'Wheat' 'Maize']

Weather_Condition unique values:
['Cloudy' 'Rainy' 'Sunny']

Numeric columns statistics:
          Rainfall_mm  Temperature_Celsius  Days_to_Harvest  \
count  1000000.000000       1000000.000000   1000000.000000   
mean       549.981901            27.504965       104.495025   
std        259.851320             7.220608        25.953412   
min        100.000896            15.000034        60.000000   
25%        324.891090            21.254502        82.000000   
50%        550.124061            27.507365       104.000000   
75%        774.738520            33.753267       127.000000   
max        999.998098            39.999997       149.000000   

       Yield_tons_per_hectare  
count          1000000.000000  
mean          

In [19]:
import os
import git
import numpy as np
import shutil
import time
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from typing import Dict, List, Tuple
import random

class FederatedServer:
    def __init__(self, num_rounds: int = 10, client_fraction: float = 1.0, learning_rate: float = 0.1):
        self.global_model = None
        self.num_rounds = num_rounds
        self.client_fraction = client_fraction
        self.clients = []
        self.feature_dim = None
        self.learning_rate = learning_rate
        self.best_model = None
        self.best_score = float('-inf')
        
    def initialize_global_model(self, feature_dim: int):
        """Initialize global model parameters with random values"""
        self.feature_dim = feature_dim
        self.global_model = {
            'coefficients': np.random.randn(feature_dim) * 0.01,  # Small random initialization
            'intercept': 0.0
        }
        self.best_model = self.global_model.copy()
    
    def select_clients(self) -> List[int]:
        """Randomly select a fraction of clients for each round"""
        num_clients = len(self.clients)
        num_selected = max(1, int(self.client_fraction * num_clients))
        return random.sample(range(num_clients), num_selected)
    
    def aggregate_models(self, client_models: List[Dict], client_weights: List[float], 
                        client_scores: List[float]) -> Dict:
        """Aggregate client models using weighted FedAvg with performance-based weighting"""
        if not client_models:
            return self.global_model
            
        # Convert negative R² scores to small positive weights
        scores = np.array(client_scores)
        scores = np.exp(scores) / np.sum(np.exp(scores))  # Softmax normalization
        
        # Combine performance scores with data size weights
        weights = np.array(client_weights) * scores
        weights = weights / np.sum(weights)
        
        # Weighted average with momentum
        momentum = 0.9
        weighted_coef = np.average(
            [model['coefficients'] for model in client_models],
            weights=weights,
            axis=0
        )
        weighted_intercept = np.average(
            [model['intercept'] for model in client_models],
            weights=weights
        )
        
        # Apply momentum update
        new_model = {
            'coefficients': momentum * self.global_model['coefficients'] + 
                          (1 - momentum) * weighted_coef,
            'intercept': momentum * self.global_model['intercept'] + 
                        (1 - momentum) * weighted_intercept
        }
        
        return new_model

class FederatedClient:
    def __init__(self, client_id: int, data_path: str, expected_dim: int = None):
        self.client_id = client_id
        self.data_path = data_path
        self.model = LinearRegression()
        self.scaler = StandardScaler()
        self.expected_dim = expected_dim
        self.X_train = None
        self.X_val = None
        self.y_train = None
        self.y_val = None
        
    def load_data(self) -> Tuple[np.ndarray, float]:
        """Load and preprocess client's local data"""
        try:
            coef = np.load(os.path.join(self.data_path, f'coef_client_{self.client_id}.npy'))
            intercept = np.load(os.path.join(self.data_path, 
                                           f'intercept_client_{self.client_id}.npy'))
            
            # Generate synthetic data based on coefficients
            num_samples = 1000  # Increase for more training data
            X = np.random.randn(num_samples, len(coef))
            y = np.dot(X, coef) + intercept + np.random.randn(num_samples) * 0.1
            
            # Split data into train and validation sets
            self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(
                X, y, test_size=0.2, random_state=42
            )
            
            # Scale features
            self.X_train = self.scaler.fit_transform(self.X_train)
            self.X_val = self.scaler.transform(self.X_val)
            
            # Handle dimension mismatch
            if self.expected_dim is not None:
                if coef.shape[0] < self.expected_dim:
                    coef = np.pad(coef, (0, self.expected_dim - coef.shape[0]))
                elif coef.shape[0] > self.expected_dim:
                    coef = coef[:self.expected_dim]
                    
            self.coef = coef
            self.intercept = intercept
            return len(self.X_train)
            
        except Exception as e:
            print(f"Error loading data for client {self.client_id}: {e}")
            raise
    
    def train_local_model(self, global_model: Dict) -> Tuple[Dict, float]:
        """Train local model using client's data and evaluate performance"""
        if global_model is not None:
            # Initialize local model with global parameters
            self.model.coef_ = global_model['coefficients'].copy()
            self.model.intercept_ = global_model['intercept']
            
            # Fine-tune on local data
            self.model.fit(self.X_train, self.y_train)
            
            # Evaluate on validation set
            val_score = r2_score(self.y_val, self.model.predict(self.X_val))
            
            return {
                'coefficients': self.model.coef_,
                'intercept': self.model.intercept_
            }, val_score
        
        return {
            'coefficients': self.coef,
            'intercept': self.intercept
        }, 0.0

def main():
    repo_url = "https://github.com/555-Satyajit/Farm-params.git"
    local_dir = r"C:\Users\SATYAJIT\crop yield\Farm-Params"
    repo_path = "Farm-params"

    # Clone repository
    if not os.path.exists(local_dir):
        try:
            print("Cloning the repository...")
            git.Repo.clone_from(repo_url, local_dir)
            print("Repository cloned successfully!")
        except git.exc.GitCommandError as e:
            print(f"Error during cloning: {e}")
            return
    else:
        print("Repository already exists, skipping clone.")

    # First pass: determine maximum feature dimension
    max_feature_dim = 0
    for client_id in range(1, 5):
        try:
            coef = np.load(os.path.join(repo_path, f'coef_client_{client_id}.npy'))
            max_feature_dim = max(max_feature_dim, len(coef))
            print(f"Client {client_id} feature dimension: {len(coef)}")
        except Exception as e:
            print(f"Error checking dimensions for client {client_id}: {e}")

    print(f"Maximum feature dimension across clients: {max_feature_dim}")
    
    if max_feature_dim == 0:
        print("Error: Could not determine feature dimension from any client")
        return

    # ... (keep existing repository setup code)

    # Initialize server with modified parameters
    server = FederatedServer(num_rounds=10, client_fraction=1.0, learning_rate=0.1)
    server.initialize_global_model(max_feature_dim)

    # Initialize clients
    client_data_sizes = []
    for client_id in range(1, 5):
        client = FederatedClient(client_id, repo_path, expected_dim=max_feature_dim)
        try:
            data_size = client.load_data()
            client_data_sizes.append(data_size)
            server.clients.append(client)
        except Exception as e:
            print(f"Error initializing client {client_id}: {e}")

    # Federated Learning rounds
    best_round_score = float('-inf')
    for round_num in range(server.num_rounds):
        print(f"\nFederated Learning Round {round_num + 1}")
        
        selected_clients = server.select_clients()
        print(f"Selected clients: {[i+1 for i in selected_clients]}")
        
        client_models = []
        selected_weights = []
        client_scores = []
        
        for idx, client_idx in enumerate(selected_clients):
            client = server.clients[client_idx]
            try:
                client_model, val_score = client.train_local_model(server.global_model)
                client_models.append(client_model)
                selected_weights.append(client_data_sizes[client_idx])
                client_scores.append(val_score)
                print(f"Client {client.client_id} completed local training (R² score: {val_score:.3f})")
            except Exception as e:
                print(f"Error training client {client.client_id}: {e}")
                continue
        
        try:
            server.global_model = server.aggregate_models(
                client_models, selected_weights, client_scores
            )
            
            # Track best model
            round_score = np.mean(client_scores)
            if round_score > best_round_score:
                best_round_score = round_score
                server.best_model = server.global_model.copy()
            
            print(f"Round {round_num + 1} aggregation completed (Avg R² score: {round_score:.3f})")
        except Exception as e:
            print(f"Error during model aggregation in round {round_num + 1}: {e}")
            continue

    # Save best global model
    try:
        with open('globalmodel1.pkl', 'wb') as f:
            pickle.dump(server.best_model, f)
        print(f"\nFinal global model saved as globalmodel1.pkl! Best R² score: {best_round_score:.3f}")
    except Exception as e:
        print(f"Error saving global model: {e}")

    # Cleanup
    time.sleep(2)
    try:
        shutil.rmtree(repo_path)
        print(f"Successfully deleted the repository at {repo_path}.")
    except Exception as e:
        print(f"Error during cleanup: {e}")

if __name__ == "__main__":
    main()

Repository already exists, skipping clone.
Client 1 feature dimension: 17
Client 2 feature dimension: 17
Client 3 feature dimension: 8
Client 4 feature dimension: 8
Maximum feature dimension across clients: 17

Federated Learning Round 1
Selected clients: [3, 2, 1, 4]
Client 3 completed local training (R² score: 0.988)
Client 2 completed local training (R² score: 0.996)
Client 1 completed local training (R² score: 0.995)
Client 4 completed local training (R² score: 0.990)
Error during model aggregation in round 1: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (4,) + inhomogeneous part.

Federated Learning Round 2
Selected clients: [4, 1, 3, 2]
Client 4 completed local training (R² score: 0.990)
Client 1 completed local training (R² score: 0.995)
Client 3 completed local training (R² score: 0.988)
Client 2 completed local training (R² score: 0.996)
Error during model aggregation in round 2: setting an 

In [28]:
import os
import git
import numpy as np
import shutil
import time
import pickle
from sklearn.linear_model import Ridge  # Changed to Ridge regression
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from typing import Dict, List, Tuple
import random

class FederatedServer:
    def __init__(self, num_rounds: int = 15, client_fraction: float = 1.0, learning_rate: float = 0.01):
        self.global_model = None
        self.num_rounds = num_rounds
        self.client_fraction = client_fraction
        self.clients = []
        self.feature_dim = None
        self.learning_rate = learning_rate
        self.best_model = None
        self.best_score = float('-inf')
        
    def initialize_global_model(self, feature_dim: int):
        """Initialize global model parameters"""
        self.feature_dim = feature_dim
        self.global_model = {
            'coefficients': np.zeros(feature_dim),
            'intercept': 0.0
        }
        self.best_model = self.global_model.copy()
    
    def select_clients(self) -> List[int]:
        """Randomly select a fraction of clients for each round"""
        num_clients = len(self.clients)
        num_selected = max(1, int(self.client_fraction * num_clients))
        return random.sample(range(num_clients), num_selected)
    
    def aggregate_models(self, client_models: List[Dict], client_weights: List[float], 
                        client_scores: List[float]) -> Dict:
        """Aggregate client models using weighted averaging"""
        if not client_models:
            return self.global_model
        
        # Use simple averaging with equal weights
        weighted_coef = np.mean([model['coefficients'] for model in client_models], axis=0)
        weighted_intercept = np.mean([model['intercept'] for model in client_models])
        
        # Gradual update with small learning rate
        new_model = {
            'coefficients': (1 - self.learning_rate) * self.global_model['coefficients'] + 
                          self.learning_rate * weighted_coef,
            'intercept': (1 - self.learning_rate) * self.global_model['intercept'] + 
                        self.learning_rate * weighted_intercept
        }
        
        return new_model
class FederatedClient:
    def __init__(self, client_id: int, data_path: str, expected_dim: int = None):
        self.client_id = client_id
        self.data_path = data_path
        # Use Ridge regression with regularization
        self.model = Ridge(alpha=1.0, random_state=42)
        self.scaler_X = StandardScaler()
        self.scaler_y = StandardScaler()
        self.expected_dim = expected_dim
        self.X_train = None
        self.X_val = None
        self.y_train = None
        self.y_val = None
        
    def load_data(self) -> Tuple[np.ndarray, float]:
        """Load and preprocess client's local data"""
        try:
            coef = np.load(os.path.join(self.data_path, f'coef_client_{self.client_id}.npy'))
            intercept = np.load(os.path.join(self.data_path, 
                                           f'intercept_client_{self.client_id}.npy'))
            
            # Generate synthetic data with more structure
            num_samples = 2000  # Increased sample size
            np.random.seed(42 + self.client_id)  # Different seed for each client
            
            # Generate features with some correlation
            X = np.random.randn(num_samples, len(coef))
            # Add some non-linear features
            X = np.column_stack([X, X[:, 0]**2, X[:, 0]*X[:, 1]])
            
            # Generate target with some noise
            noise = np.random.normal(0, 0.1, num_samples)
            y = np.dot(X[:, :len(coef)], coef) + intercept + noise
            
            # Split data
            self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(
                X[:, :len(coef)], y, test_size=0.2, random_state=42
            )
            
            # Scale features and target
            self.X_train = self.scaler_X.fit_transform(self.X_train)
            self.X_val = self.scaler_X.transform(self.X_val)
            self.y_train = self.scaler_y.fit_transform(self.y_train.reshape(-1, 1)).ravel()
            self.y_val = self.scaler_y.transform(self.y_val.reshape(-1, 1)).ravel()
            
            # Handle dimension mismatch
            if self.expected_dim is not None:
                if coef.shape[0] < self.expected_dim:
                    coef = np.pad(coef, (0, self.expected_dim - coef.shape[0]))
                elif coef.shape[0] > self.expected_dim:
                    coef = coef[:self.expected_dim]
                    
            self.coef = coef
            self.intercept = intercept
            return len(self.X_train)
            
        except Exception as e:
            print(f"Error loading data for client {self.client_id}: {e}")
            raise
    
    def train_local_model(self, global_model: Dict) -> Tuple[Dict, float]:
        """Train local model using client's data"""
        try:
            # Fit model on local data
            self.model.fit(self.X_train, self.y_train)
            
            # Evaluate on validation set
            y_pred = self.model.predict(self.X_val)
            val_score = r2_score(self.y_val, y_pred)
            
            # Blend with global model
            if global_model is not None:
                blend_ratio = 0.7  # Favor local model more
                coef = blend_ratio * self.model.coef_ + (1 - blend_ratio) * global_model['coefficients']
                intercept = blend_ratio * self.model.intercept_ + (1 - blend_ratio) * global_model['intercept']
            else:
                coef = self.model.coef_
                intercept = self.model.intercept_
            
            return {
                'coefficients': coef,
                'intercept': intercept
            }, val_score
            
        except Exception as e:
            print(f"Error in local training for client {self.client_id}: {e}")
            return global_model, -np.inf
            

def main():
    repo_url = "https://github.com/555-Satyajit/Farm-params.git"
    local_dir = r"C:\Users\SATYAJIT\crop yield\Farm-Params"
    repo_path = "Farm-params"

    # Clone repository
    if not os.path.exists(local_dir):
        try:
            print("Cloning the repository...")
            git.Repo.clone_from(repo_url, local_dir)
            print("Repository cloned successfully!")
        except git.exc.GitCommandError as e:
            print(f"Error during cloning: {e}")
            return
    else:
        print("Repository already exists, skipping clone.")

    # First pass: determine maximum feature dimension
    max_feature_dim = 0
    for client_id in range(1, 5):
        try:
            coef = np.load(os.path.join(repo_path, f'coef_client_{client_id}.npy'))
            max_feature_dim = max(max_feature_dim, len(coef))
            print(f"Client {client_id} feature dimension: {len(coef)}")
        except Exception as e:
            print(f"Error checking dimensions for client {client_id}: {e}")

    print(f"Maximum feature dimension across clients: {max_feature_dim}")
    
    if max_feature_dim == 0:
        print("Error: Could not determine feature dimension from any client")
        return

    # ... (keep existing repository setup code)

    # Initialize server with modified parameters
    server = FederatedServer(num_rounds=10, client_fraction=1.0, learning_rate=0.1)
    server.initialize_global_model(max_feature_dim)

    # Initialize clients
    client_data_sizes = []
    for client_id in range(1, 5):
        client = FederatedClient(client_id, repo_path, expected_dim=max_feature_dim)
        try:
            data_size = client.load_data()
            client_data_sizes.append(data_size)
            server.clients.append(client)
        except Exception as e:
            print(f"Error initializing client {client_id}: {e}")

    # Federated Learning rounds
    best_round_score = float('-inf')
    for round_num in range(server.num_rounds):
        print(f"\nFederated Learning Round {round_num + 1}")
        
        selected_clients = server.select_clients()
        print(f"Selected clients: {[i+1 for i in selected_clients]}")
        
        client_models = []
        selected_weights = []
        client_scores = []
        
        for idx, client_idx in enumerate(selected_clients):
            client = server.clients[client_idx]
            try:
                client_model, val_score = client.train_local_model(server.global_model)
                client_models.append(client_model)
                selected_weights.append(client_data_sizes[client_idx])
                client_scores.append(val_score)
                print(f"Client {client.client_id} completed local training (R² score: {val_score:.3f})")
            except Exception as e:
                print(f"Error training client {client.client_id}: {e}")
                continue
        
        try:
            server.global_model = server.aggregate_models(
                client_models, selected_weights, client_scores
            )
            
            # Track best model
            round_score = np.mean(client_scores)
            if round_score > best_round_score:
                best_round_score = round_score
                server.best_model = server.global_model.copy()
            
            print(f"Round {round_num + 1} aggregation completed (Avg R² score: {round_score:.3f})")
        except Exception as e:
            print(f"Error during model aggregation in round {round_num + 1}: {e}")
            continue

    # Save best global model
    try:
        with open('globalmodel1.pkl', 'wb') as f:
            pickle.dump(server.best_model, f)
        print(f"\nFinal global model saved as globalmodel1.pkl! Best R² score: {best_round_score:.3f}")
    except Exception as e:
        print(f"Error saving global model: {e}")

    # Cleanup
    time.sleep(2)
    try:
        shutil.rmtree(repo_path)
        print(f"Successfully deleted the repository at {repo_path}.")
    except Exception as e:
        print(f"Error during cleanup: {e}")

if __name__ == "__main__":
    main()

Repository already exists, skipping clone.
Client 1 feature dimension: 17
Client 2 feature dimension: 17
Client 3 feature dimension: 8
Client 4 feature dimension: 8
Maximum feature dimension across clients: 17

Federated Learning Round 1
Selected clients: [4, 2, 3, 1]
Error in local training for client 4: operands could not be broadcast together with shapes (8,) (17,) 
Client 4 completed local training (R² score: -inf)
Client 2 completed local training (R² score: 0.997)
Error in local training for client 3: operands could not be broadcast together with shapes (8,) (17,) 
Client 3 completed local training (R² score: -inf)
Client 1 completed local training (R² score: 0.996)
Round 1 aggregation completed (Avg R² score: -inf)

Federated Learning Round 2
Selected clients: [4, 3, 2, 1]
Error in local training for client 4: operands could not be broadcast together with shapes (8,) (17,) 
Client 4 completed local training (R² score: -inf)
Error in local training for client 3: operands could no