In [2]:
import joblib

# Load the models
model_client_1 = joblib.load('model_client_1.pkl')
model_client_2 = joblib.load('model_client_2.pkl')
model_client_3 = joblib.load('model_client_3.pkl')
model_client_4 = joblib.load('model_client_4.pkl')

# Extract weights (coefficients)
coef_1 = model_client_1.coef_
coef_2 = model_client_2.coef_
coef_3 = model_client_3.coef_
coef_4 = model_client_4.coef_

# Extract biases (intercepts)
intercept_1 = model_client_1.intercept_
intercept_2 = model_client_2.intercept_
intercept_3 = model_client_3.intercept_
intercept_4 = model_client_4.intercept_

# Display results
print("Client 1 Coefficients:", coef_1)
print("Client 1 Intercept:", intercept_1)

print("Client 2 Coefficients:", coef_2)
print("Client 2 Intercept:", intercept_2)

print("Client 3 Coefficients:", coef_3)
print("Client 3 Intercept:", intercept_3)

print("Client 4 Coefficients:", coef_4)
print("Client 4 Intercept:", intercept_4)


Client 1 Coefficients: [ 1.54702547e-04 -1.22302337e-03  7.64345295e-01  8.48115199e-02
  4.41670814e-01  3.53623120e-01  5.37683894e-04  5.90079238e-05]
Client 1 Intercept: 5.774058109752646e-05
Client 2 Coefficients: [ 1.54702547e-04 -1.22302337e-03  7.64345295e-01  8.48115199e-02
  4.41670814e-01  3.53623120e-01  5.37683894e-04  5.90079238e-05]
Client 2 Intercept: 5.774058109752646e-05
Client 3 Coefficients: [ 1.54702547e-04 -1.22302337e-03  7.64345295e-01  8.48115199e-02
  4.41670814e-01  3.53623120e-01  5.37683894e-04  5.90079238e-05]
Client 3 Intercept: 5.774058109752646e-05
Client 4 Coefficients: [ 3.53241478e-04 -8.53621509e-04  7.66118385e-01  8.47888754e-02
  4.41010276e-01  3.52858297e-01 -4.85743316e-04  1.15542825e-03]
Client 4 Intercept: 0.0005938679868076994


In [4]:
import numpy as np

# Aggregate coefficients (average)
global_coef = np.mean([coef_1, coef_2, coef_3, coef_4], axis=0)

# Aggregate intercepts (average)
global_intercept = np.mean([intercept_1, intercept_2, intercept_3, intercept_4])

print("Global Coefficients:", global_coef)
print("Global Intercept:", global_intercept)


Global Coefficients: [ 2.04337280e-04 -1.13067290e-03  7.64788567e-01  8.48058588e-02
  4.41505680e-01  3.53431914e-01  2.81827092e-04  3.33113006e-04]
Global Intercept: 0.0001917724325250697


In [6]:
joblib.dump((global_coef, global_intercept), 'global_model.pkl')


['global_model.pkl']

In [18]:
import joblib
import pandas as pd
import numpy as np

# Load the trained model coefficients and intercept
global_coef, global_intercept = joblib.load('global_model.pkl')

# Correct mappings
soil_type_mapping = {
    'Sandy': 4,
    'Clay': 1,
    'Loam': 2,
    'Silt': 5,
    'Peaty': 3,
    'Chalky': 0
}

crop_mapping = {
    'Cotton': 1,
    'Rice': 3,
    'Barley': 0,
    'Soybean': 4,
    'Wheat': 5,
    'Maize': 2
}

weather_condition_mapping = {
    'Sunny': 2,
    'Rainy': 1,
    'Cloudy': 0
}

# Function to process new input data and make predictions
def predict_crop_yield(input_data):
    # Convert input data to DataFrame
    input_df = pd.DataFrame([input_data])
    
    # Apply the same preprocessing steps as before
    input_df['Soil_Type'] = input_df['Soil_Type'].map(soil_type_mapping)
    input_df['Crop'] = input_df['Crop'].map(crop_mapping)
    input_df['Weather_Condition'] = input_df['Weather_Condition'].map(weather_condition_mapping)
    input_df['Fertilizer_Used'] = input_df['Fertilizer_Used'].map({'Yes': 1, 'No': 0})
    input_df['Irrigation_Used'] = input_df['Irrigation_Used'].map({'Yes': 1, 'No': 0})
    
    # Convert the DataFrame to a numpy array
    input_data_array = input_df.to_numpy().flatten()

    # Ensure the number of features matches the model's coefficients
    if len(input_data_array) != len(global_coef):
        raise ValueError(f"Feature count mismatch: Expected {len(global_coef)} features, but got {len(input_data_array)}.")

    # Get the predicted crop yield using the model's coefficients and intercept
    prediction = np.dot(input_data_array, global_coef) + global_intercept

    return prediction

# Function to get user input for each feature
def get_user_input():
    soil_type = input("Enter Soil Type (Sandy, Clay, Loam, Silt, Peaty, Chalky): ")
    crop = input("Enter Crop (Cotton, Rice, Barley, Soybean, Wheat, Maize): ")
    rainfall_mm = float(input("Enter Rainfall in mm: "))
    temperature_celsius = float(input("Enter Temperature in Celsius: "))
    fertilizer_used = input("Fertilizer Used (Yes, No): ")
    irrigation_used = input("Irrigation Used (Yes, No): ")
    weather_condition = input("Enter Weather Condition (Sunny, Rainy, Cloudy): ")
    days_to_harvest = int(input("Enter Days to Harvest: "))
    
    input_data = {
        'Soil_Type': soil_type,
        'Crop': crop,
        'Rainfall_mm': rainfall_mm,
        'Temperature_Celsius': temperature_celsius,
        'Fertilizer_Used': fertilizer_used,
        'Irrigation_Used': irrigation_used,
        'Weather_Condition': weather_condition,
        'Days_to_Harvest': days_to_harvest
    }
    
    return input_data

# Get user input
input_data = get_user_input()

# Get the predicted crop yield
predicted_yield = predict_crop_yield(input_data)
print("Prediction for new data:", predicted_yield)


Enter Soil Type (Sandy, Clay, Loam, Silt, Peaty, Chalky):  Sandy
Enter Crop (Cotton, Rice, Barley, Soybean, Wheat, Maize):  Rice
Enter Rainfall in mm:  576
Enter Temperature in Celsius:  45
Fertilizer Used (Yes, No):  Yes
Irrigation Used (Yes, No):  Yes
Enter Weather Condition (Sunny, Rainy, Cloudy):  Sunny
Enter Days to Harvest:  85


Prediction for new data: 445.15591147521786


In [None]:
! streamlit run app.py

In [23]:
import joblib
import numpy as np
from sklearn.metrics import mean_squared_error, accuracy_score, classification_report, roc_auc_score
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()


df = pd.read_csv('crop_yield.csv')
df = df.drop(columns=['Region'])
df['Soil_Type'] = label_encoder.fit_transform(df['Soil_Type'])
df['Crop'] = label_encoder.fit_transform(df['Crop'])
df['Weather_Condition'] = label_encoder.fit_transform(df['Weather_Condition'])
df['Irrigation_Used'] = label_encoder.fit_transform(df['Irrigation_Used'])
df['Fertilizer_Used'] = label_encoder.fit_transform(df['Fertilizer_Used'])
df1=df.copy()
df2=df.copy()
from sklearn.preprocessing import StandardScaler

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit and transform the data
df3 = scaler.fit_transform(df1)

# Convert the result back to a DataFrame with column names from df2
df3 = pd.DataFrame(df3, columns=df1.columns)

# Display the standardized DataFrame
print(df3)
X = df3.drop(['Yield_tons_per_hectare'],axis=1)
y = df3['Yield_tons_per_hectare']

# Split into train/test if you haven't already
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Load the models
client_models = {
    1: joblib.load('model_client_1.pkl'),
    2: joblib.load('model_client_2.pkl'),
    3: joblib.load('model_client_3.pkl'),
    4: joblib.load('model_client_4.pkl')
}

# Load the global model
global_coef, global_intercept = joblib.load('global_model.pkl')

def evaluate_models(X_test, y_test, client_models, global_coef, global_intercept):
    results = {}
    
    # Evaluate each client model
    for client_id, model in client_models.items():
        # Get predictions
        y_pred = model.predict(X_test)
        y_pred_proba = model.predict_proba(X_test)[:, 1]
        
        # Calculate metrics
        accuracy = accuracy_score(y_test, y_pred)
        auc = roc_auc_score(y_test, y_pred_proba)
        report = classification_report(y_test, y_pred)
        
        results[f'Client_{client_id}'] = {
            'accuracy': accuracy,
            'auc': auc,
            'classification_report': report
        }
    
    # Evaluate global model
    # For global model, we need to manually calculate predictions
    global_pred = np.dot(X_test, global_coef) + global_intercept
    global_pred_binary = (global_pred > 0.5).astype(int)
    
    global_accuracy = accuracy_score(y_test, global_pred_binary)
    global_auc = roc_auc_score(y_test, global_pred)
    global_report = classification_report(y_test, global_pred_binary)
    
    results['Global'] = {
        'accuracy': global_accuracy,
        'auc': global_auc,
        'classification_report': global_report
    }
    
    return results

# Run evaluation
results = evaluate_models(X_test, y_test, client_models, global_coef, global_intercept)

# Print results in a formatted way
print("\nModel Evaluation Results:")
print("-" * 50)

for model_name, metrics in results.items():
    print(f"\n{model_name}:")
    print(f"Accuracy: {metrics['accuracy']:.4f}")
    print(f"AUC-ROC: {metrics['auc']:.4f}")
    print("\nClassification Report:")
    print(metrics['classification_report'])
    print("-" * 50)

# Compare models
accuracies = {model: metrics['accuracy'] for model, metrics in results.items()}
best_model = max(accuracies, key=accuracies.get)
print(f"\nBest performing model: {best_model} with accuracy: {accuracies[best_model]:.4f}")

        Soil_Type      Crop  Rainfall_mm  Temperature_Celsius  \
0        0.877795 -0.877956     1.335747             0.023821   
1       -0.878529  0.293144     1.703634            -1.312747   
2       -0.293088 -1.463506    -1.546977             0.317020   
3        0.877795  0.878694     1.681287            -1.504137   
4        1.463237  1.464244     0.694233             0.569997   
...           ...       ...          ...                  ...   
999995   1.463237  0.293144    -0.951223             0.066817   
999996  -1.463971 -1.463506     1.473957             1.683526   
999997   0.292354 -0.877956     1.221392            -0.434164   
999998   1.463237  1.464244    -0.220007             0.767324   
999999   0.877795 -0.292406    -1.420219            -0.028546   

        Fertilizer_Used  Irrigation_Used  Weather_Condition  Days_to_Harvest  \
0              -0.99988         1.001019          -1.226353         0.674477   
1               1.00012         1.001019          -0.001398

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


ValueError: X has 8 features, but LinearRegression is expecting 17 features as input.

In [25]:
# Print feature information
print("Number of features in test data:", X_test.shape[1])
print("Features in test data:", list(X_test.columns))  # if using pandas

# Load one of the client models to check expected features
model = joblib.load('model_client_1.pkl')
if hasattr(model, 'feature_names_in_'):
    print("Features expected by model:", list(model.feature_names_in_))

Number of features in test data: 8
Features in test data: ['Soil_Type', 'Crop', 'Rainfall_mm', 'Temperature_Celsius', 'Fertilizer_Used', 'Irrigation_Used', 'Weather_Condition', 'Days_to_Harvest']


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [27]:
# Load one client model to check expected features
model = joblib.load('model_client_1.pkl')
if hasattr(model, 'feature_names_in_'):
    print("Features expected by model:", list(model.feature_names_in_))
else:
    # If feature names aren't stored, we at least know it expects 17 features
    print("Model expects 17 features")

Model expects 17 features


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [29]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

def preprocess_data(df):
    """
    Preprocess the data to match the model's expected format
    """
    # Make a copy to avoid modifying original data
    df = df.copy()
    
    # 1. Handle categorical variables
    categorical_features = ['Soil_Type', 'Crop', 'Weather_Condition']
    numerical_features = ['Rainfall_mm', 'Temperature_Celsius', 'Fertilizer_Used', 
                         'Irrigation_Used', 'Days_to_Harvest']
    
    # Create column transformer for categorical variables
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', 'passthrough', numerical_features),
            ('cat', OneHotEncoder(drop='first', sparse=False), categorical_features)
        ])
    
    # Fit and transform the data
    X_processed = preprocessor.fit_transform(df)
    
    # Get feature names after encoding
    cat_feature_names = []
    for i, feature in enumerate(categorical_features):
        categories = preprocessor.named_transformers_['cat'].categories_[i][1:]  # drop first category
        cat_feature_names.extend([f"{feature}_{cat}" for cat in categories])
    
    feature_names = numerical_features + cat_feature_names
    
    # Convert to DataFrame with proper column names
    X_processed_df = pd.DataFrame(X_processed, columns=feature_names)
    
    print("Features after preprocessing:", list(X_processed_df.columns))
    print("Number of features after preprocessing:", len(X_processed_df.columns))
    
    return X_processed_df, preprocessor

# Preprocess your test data
X_processed, preprocessor = preprocess_data(X_test)

# Now check if the number of features matches
print("\nOriginal test data shape:", X_test.shape)
print("Processed test data shape:", X_processed.shape)

# If the number of features still doesn't match 17, we'll need to:
# 1. Check what features the model was trained on
# 2. Add any missing features with default values
# 3. Ensure feature order matches the training data

# Load and evaluate models with processed data
def evaluate_models_with_processed_data(X_processed, y_test, client_models, global_coef, global_intercept):
    results = {}
    
    # Evaluate each client model
    for client_id, model in client_models.items():
        try:
            # Get predictions
            y_pred = model.predict(X_processed)
            
            # Calculate metrics (assuming regression task)
            mse = mean_squared_error(y_test, y_pred)
            rmse = np.sqrt(mse)
            
            results[f'Client_{client_id}'] = {
                'rmse': rmse,
                'mse': mse
            }
        except Exception as e:
            print(f"Error evaluating Client_{client_id}: {str(e)}")
    
    # Evaluate global model
    try:
        global_pred = np.dot(X_processed, global_coef) + global_intercept
        global_mse = mean_squared_error(y_test, global_pred)
        global_rmse = np.sqrt(global_mse)
        
        results['Global'] = {
            'rmse': global_rmse,
            'mse': global_mse
        }
    except Exception as e:
        print(f"Error evaluating Global model: {str(e)}")
    
    return results

# Load models and evaluate
client_models = {
    1: joblib.load('model_client_1.pkl'),
    2: joblib.load('model_client_2.pkl'),
    3: joblib.load('model_client_3.pkl'),
    4: joblib.load('model_client_4.pkl')
}

global_coef, global_intercept = joblib.load('global_model.pkl')

# Evaluate with processed data
results = evaluate_models_with_processed_data(X_processed, y_test, client_models, global_coef, global_intercept)

# Print results
print("\nModel Evaluation Results:")
print("-" * 50)
for model_name, metrics in results.items():
    print(f"\n{model_name}:")
    print(f"RMSE: {metrics['rmse']:.4f}")
    print(f"MSE: {metrics['mse']:.4f}")

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Features after preprocessing: ['Rainfall_mm', 'Temperature_Celsius', 'Fertilizer_Used', 'Irrigation_Used', 'Days_to_Harvest', 'Soil_Type_-0.8785294771289613', 'Soil_Type_-0.2930878736332484', 'Soil_Type_0.29235372986246455', 'Soil_Type_0.8777953333581775', 'Soil_Type_1.4632369368538904', 'Crop_-0.8779560852889114', 'Crop_-0.29240609743473833', 'Crop_0.2931438904194347', 'Crop_0.8786938782736078', 'Crop_1.4642438661277808', 'Weather_Condition_-0.0013976739363985857', 'Weather_Condition_1.2235575726880659']
Number of features after preprocessing: 17

Original test data shape: (200000, 8)
Processed test data shape: (200000, 17)
Error evaluating Client_3: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- Crop_-0.29240609743473833
- Crop_-0.8779560852889114
- Crop_0.2931438904194347
- Crop_0.8786938782736078
- Crop_1.4642438661277808
- ...
Feature names seen at fit time, yet now missing:
- Crop
- Soil_Type
- Weather_Condition

Error evalua



In [31]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error

def preprocess_data_simple(df):
    """
    Preprocess the data keeping original feature names without one-hot encoding
    """
    # Make a copy to avoid modifying original data
    df = df.copy()
    
    # Initialize label encoders for categorical variables
    categorical_features = ['Soil_Type', 'Crop', 'Weather_Condition']
    label_encoders = {}
    
    # Encode categorical variables
    for feature in categorical_features:
        label_encoders[feature] = LabelEncoder()
        df[feature] = label_encoders[feature].fit_transform(df[feature])
    
    # Ensure all features are numeric
    for col in df.columns:
        df[col] = pd.to_numeric(df[col])
    
    # Get the feature order to match the original training data
    feature_order = [
        'Soil_Type',
        'Crop',
        'Rainfall_mm',
        'Temperature_Celsius',
        'Fertilizer_Used',
        'Irrigation_Used',
        'Weather_Condition',
        'Days_to_Harvest'
    ]
    
    return df[feature_order]

# Preprocess test data
X_processed = preprocess_data_simple(X_test)

print("Features after preprocessing:", list(X_processed.columns))
print("Number of features after preprocessing:", len(X_processed.columns))
print("Processed test data shape:", X_processed.shape)

# Load and evaluate models
def evaluate_models(X_processed, y_test, client_models, global_coef, global_intercept):
    results = {}
    
    # Evaluate each client model
    for client_id, model in client_models.items():
        try:
            # Get predictions
            y_pred = model.predict(X_processed)
            
            # Calculate metrics
            mse = mean_squared_error(y_test, y_pred)
            rmse = np.sqrt(mse)
            
            # Calculate R-squared
            ss_tot = np.sum((y_test - np.mean(y_test)) ** 2)
            ss_res = np.sum((y_test - y_pred) ** 2)
            r2 = 1 - (ss_res / ss_tot)
            
            results[f'Client_{client_id}'] = {
                'rmse': rmse,
                'mse': mse,
                'r2': r2
            }
        except Exception as e:
            print(f"Error evaluating Client_{client_id}: {str(e)}")
    
    # Evaluate global model
    try:
        global_pred = np.dot(X_processed, global_coef) + global_intercept
        global_mse = mean_squared_error(y_test, global_pred)
        global_rmse = np.sqrt(global_mse)
        
        # Calculate R-squared for global model
        ss_tot = np.sum((y_test - np.mean(y_test)) ** 2)
        ss_res = np.sum((y_test - global_pred) ** 2)
        global_r2 = 1 - (ss_res / ss_tot)
        
        results['Global'] = {
            'rmse': global_rmse,
            'mse': global_mse,
            'r2': global_r2
        }
    except Exception as e:
        print(f"Error evaluating Global model: {str(e)}")
    
    return results

# Load models
client_models = {
    1: joblib.load('model_client_1.pkl'),
    2: joblib.load('model_client_2.pkl'),
    3: joblib.load('model_client_3.pkl'),
    4: joblib.load('model_client_4.pkl')
}

global_coef, global_intercept = joblib.load('global_model.pkl')

# Evaluate models
results = evaluate_models(X_processed, y_test, client_models, global_coef, global_intercept)

# Print results with additional details
print("\nModel Evaluation Results:")
print("-" * 50)
for model_name, metrics in results.items():
    print(f"\n{model_name}:")
    print(f"RMSE: {metrics['rmse']:.4f}")
    print(f"MSE: {metrics['mse']:.4f}")
    print(f"R²: {metrics['r2']:.4f}")

# Find best performing model
best_model = min(results.items(), key=lambda x: x[1]['rmse'])
print(f"\nBest performing model: {best_model[0]}")
print(f"Best RMSE: {best_model[1]['rmse']:.4f}")

Features after preprocessing: ['Soil_Type', 'Crop', 'Rainfall_mm', 'Temperature_Celsius', 'Fertilizer_Used', 'Irrigation_Used', 'Weather_Condition', 'Days_to_Harvest']
Number of features after preprocessing: 8
Processed test data shape: (200000, 8)
Error evaluating Client_1: X has 8 features, but LinearRegression is expecting 17 features as input.
Error evaluating Client_2: X has 8 features, but LinearRegression is expecting 17 features as input.

Model Evaluation Results:
--------------------------------------------------

Client_3:
RMSE: 0.2952
MSE: 0.0871
R²: 0.9130

Client_4:
RMSE: 0.2952
MSE: 0.0871
R²: 0.9130

Global:
RMSE: 0.2952
MSE: 0.0871
R²: 0.9130

Best performing model: Client_4
Best RMSE: 0.2952


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [33]:
import joblib
import numpy as np
from datetime import datetime
import os
import json
import hashlib
from pathlib import Path

class ModelDistributor:
    def __init__(self, global_model_path='global_model.pkl', version=None):
        self.global_model_path = global_model_path
        self.version = version or datetime.now().strftime("%Y%m%d_%H%M%S")
        self.distribution_dir = Path('distributed_models')
        self.distribution_dir.mkdir(exist_ok=True)
        
    def calculate_model_hash(self, model_data):
        """Calculate SHA-256 hash of model data for integrity verification"""
        return hashlib.sha256(str(model_data).encode()).hexdigest()
    
    def create_model_metadata(self, client_id, model_hash):
        """Create metadata for the distributed model"""
        return {
            'client_id': client_id,
            'distribution_date': datetime.now().isoformat(),
            'model_version': self.version,
            'model_hash': model_hash,
            'original_model': self.global_model_path
        }
    
    def distribute_to_client(self, client_id):
        """Distribute global model to a specific client"""
        try:
            # Load global model
            global_coef, global_intercept = joblib.load(self.global_model_path)
            
            # Create client-specific model directory
            client_dir = self.distribution_dir / f'client_{client_id}'
            client_dir.mkdir(exist_ok=True)
            
            # Create model file name with version
            model_filename = f'global_model_v{self.version}.pkl'
            model_path = client_dir / model_filename
            
            # Save model for client
            model_data = (global_coef, global_intercept)
            joblib.dump(model_data, model_path)
            
            # Calculate model hash
            model_hash = self.calculate_model_hash(model_data)
            
            # Create and save metadata
            metadata = self.create_model_metadata(client_id, model_hash)
            metadata_path = client_dir / f'metadata_v{self.version}.json'
            
            with open(metadata_path, 'w') as f:
                json.dump(metadata, f, indent=4)
            
            return {
                'status': 'success',
                'client_id': client_id,
                'model_path': str(model_path),
                'metadata_path': str(metadata_path)
            }
            
        except Exception as e:
            return {
                'status': 'error',
                'client_id': client_id,
                'error': str(e)
            }
    
    def distribute_to_all_clients(self, client_ids):
        """Distribute global model to all clients"""
        results = []
        for client_id in client_ids:
            result = self.distribute_to_client(client_id)
            results.append(result)
            
            # Print status message
            status = "✓" if result['status'] == 'success' else "✗"
            print(f"Client {client_id}: {status}")
            
            if result['status'] == 'error':
                print(f"  Error: {result['error']}")
            else:
                print(f"  Model: {result['model_path']}")
                print(f"  Metadata: {result['metadata_path']}")
                
        return results
    
    def verify_distribution(self, client_id):
        """Verify the integrity of a distributed model"""
        try:
            client_dir = self.distribution_dir / f'client_{client_id}'
            model_path = client_dir / f'global_model_v{self.version}.pkl'
            metadata_path = client_dir / f'metadata_v{self.version}.json'
            
            # Load distributed model and calculate hash
            distributed_model = joblib.load(model_path)
            current_hash = self.calculate_model_hash(distributed_model)
            
            # Load metadata and get stored hash
            with open(metadata_path, 'r') as f:
                metadata = json.load(f)
            stored_hash = metadata['model_hash']
            
            # Compare hashes
            return {
                'client_id': client_id,
                'verified': current_hash == stored_hash,
                'model_version': metadata['model_version'],
                'distribution_date': metadata['distribution_date']
            }
            
        except Exception as e:
            return {
                'client_id': client_id,
                'verified': False,
                'error': str(e)
            }

# Usage example
if __name__ == "__main__":
    # Initialize distributor
    distributor = ModelDistributor(global_model_path='global_model.pkl')
    
    # List of client IDs
    client_ids = [1, 2, 3, 4]
    
    print("Distributing global model to clients...")
    print("-" * 50)
    
    # Distribute models
    results = distributor.distribute_to_all_clients(client_ids)
    
    print("\nVerifying distributions...")
    print("-" * 50)
    
    # Verify distributions
    for client_id in client_ids:
        verification = distributor.verify_distribution(client_id)
        status = "✓" if verification.get('verified') else "✗"
        print(f"Client {client_id}: {status}")
        if not verification.get('verified'):
            print(f"  Error: {verification.get('error', 'Verification failed')}")

Distributing global model to clients...
--------------------------------------------------
Client 1: ✓
  Model: distributed_models\client_1\global_model_v20241222_163858.pkl
  Metadata: distributed_models\client_1\metadata_v20241222_163858.json
Client 2: ✓
  Model: distributed_models\client_2\global_model_v20241222_163858.pkl
  Metadata: distributed_models\client_2\metadata_v20241222_163858.json
Client 3: ✓
  Model: distributed_models\client_3\global_model_v20241222_163858.pkl
  Metadata: distributed_models\client_3\metadata_v20241222_163858.json
Client 4: ✓
  Model: distributed_models\client_4\global_model_v20241222_163858.pkl
  Metadata: distributed_models\client_4\metadata_v20241222_163858.json

Verifying distributions...
--------------------------------------------------
Client 1: ✓
Client 2: ✓
Client 3: ✓
Client 4: ✓


In [2]:
import joblib
import numpy as np

# Load the models
model_client_1 = joblib.load('model_client_1.pkl')
model_client_2 = joblib.load('model_client_2.pkl')
model_client_3 = joblib.load('model_client_3.pkl')
model_client_4 = joblib.load('model_client_4.pkl')

# Extract weights (coefficients)
coef_1 = model_client_1.coef_
coef_2 = model_client_2.coef_
coef_3 = model_client_3.coef_
coef_4 = model_client_4.coef_

# Extract biases (intercepts)
intercept_1 = model_client_1.intercept_
intercept_2 = model_client_2.intercept_
intercept_3 = model_client_3.intercept_
intercept_4 = model_client_4.intercept_

# Display results
print("Client 1 Coefficients:", coef_1)
print("Client 1 Intercept:", intercept_1)

print("Client 2 Coefficients:", coef_2)
print("Client 2 Intercept:", intercept_2)

print("Client 3 Coefficients:", coef_3)
print("Client 3 Intercept:", intercept_3)

print("Client 4 Coefficients:", coef_4)
print("Client 4 Intercept:", intercept_4)

# Save coefficients and intercepts as .npy files
np.save('coef_client_1.npy', coef_1)
np.save('coef_client_2.npy', coef_2)
np.save('coef_client_3.npy', coef_3)
np.save('coef_client_4.npy', coef_4)

np.save('intercept_client_1.npy', intercept_1)
np.save('intercept_client_2.npy', intercept_2)
np.save('intercept_client_3.npy', intercept_3)
np.save('intercept_client_4.npy', intercept_4)

print("Model parameters saved successfully.")


Client 1 Coefficients: [ 1.29646234e+00  1.43479168e-01  7.49492860e-01  5.99692963e-01
  1.20276851e-03  2.19722231e-03 -7.63652704e-04 -1.11523257e-03
  1.09437005e-03  1.98662205e-03 -2.13774932e-03 -4.83673753e-03
  2.75819816e-03 -6.03955228e-03 -4.41903616e-03 -3.82805169e-04
  1.72747181e-03]
Client 1 Intercept: 4.649473449370341
Client 2 Coefficients: [ 1.29855473e+00  1.43170765e-01  7.51797574e-01  6.00038871e-01
  9.00648904e-04  2.16979625e-03 -4.38318816e-03 -7.64832536e-05
  4.68796958e-04 -6.50306727e-03  4.78925832e-03  3.99148694e-03
  2.06905597e-03  3.06729233e-03  5.09248100e-03 -2.31231230e-04
  5.65013950e-04]
Client 2 Intercept: 4.642552360196861
Client 3 Coefficients: [ 1.54702547e-04 -1.22302337e-03  7.64345295e-01  8.48115199e-02
  4.41670814e-01  3.53623120e-01  5.37683894e-04  5.90079238e-05]
Client 3 Intercept: 5.774058109752646e-05
Client 4 Coefficients: [ 3.53241478e-04 -8.53621509e-04  7.66118385e-01  8.47888754e-02
  4.41010276e-01  3.52858297e-01 -4.85

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [4]:
import git
import os

# Initialize the repo and add remote
repo_dir = r"C:\Users\SATYAJIT\crop yield"  # Replace with the actual path
repo = git.Repo.init(repo_dir)

# Add remote (GitHub repository URL)
origin = repo.create_remote('origin', 'https://github.com/555-Satyajit/Farm-params')  # Replace with your repo URL

# Add files
files_to_add = ['coef_client_1.npy', 'coef_client_2.npy', 'coef_client_3.npy', 'coef_client_4.npy',
                'intercept_client_1.npy', 'intercept_client_2.npy', 'intercept_client_3.npy', 'intercept_client_4.npy']
repo.index.add(files_to_add)

# Commit the changes
repo.index.commit("Upload model parameters")

# Push to GitHub
origin.push('master')


ModuleNotFoundError: No module named 'git'

In [6]:
! pip install git

ERROR: Could not find a version that satisfies the requirement git (from versions: none)
ERROR: No matching distribution found for git
