In [1]:
from trusted_authority import TrustedAuthority
from company import Company
from central_analyzer import CentralAnalyzer
from utils import save_model, predict_salary

In [2]:
# Step 1: Initialize Trusted Authority
print("\n=== Step 1: Initializing Trusted Authority and Generating Encryption Context ===")
trusted_authority = TrustedAuthority(lambda_ridge=10.0)
public_context = trusted_authority.get_public_context()
print("Trusted Authority has generated encryption keys and shared the public context with all companies.")


=== Step 1: Initializing Trusted Authority and Generating Encryption Context ===
Trusted Authority has generated encryption keys and shared the public context with all companies.


In [3]:
# Step 2: Initialize Central Analyzer
print("\n=== Step 2: Initializing Central Analyzer ===")
central_analyzer = CentralAnalyzer(public_context)
print("Central Analyzer is ready to receive encrypted data.")

# Step 3: Companies Load Data
print("\n=== Step 3: Companies Load Data ===")
companies = {
    'Accenture': Company('Accenture', 'data/accenture_employee_salary_data.csv', public_context),
    'GovTech': Company('GovTech', 'data/govtech_employee_salary_data.csv', public_context),
    'Tiktok': Company('Tiktok', 'data/tiktok_employee_salary_data.csv', public_context)
}
print("All companies have loaded their data.")


=== Step 2: Initializing Central Analyzer ===
Central Analyzer is ready to receive encrypted data.

=== Step 3: Companies Load Data ===

Loading data for Accenture...
Data loaded successfully for Accenture
Number of records: 180
Converting salary data for Accenture...

Loading data for GovTech...
Data loaded successfully for GovTech
Number of records: 180
Converting salary data for GovTech...

Loading data for Tiktok...
Data loaded successfully for Tiktok
Number of records: 180
Converting salary data for Tiktok...
All companies have loaded their data.


In [4]:
# Step 4: Companies Compute Data Summaries for Global Scaling
print("\n=== Step 4: Companies Compute Data Summaries for Global Scaling ===")
company_summaries = {}
for company_name, company in companies.items():
    print(f"{company_name} is computing data summaries for global scaling without sharing raw data.")
    summaries = company.compute_data_summary()
    company_summaries[company_name] = summaries


=== Step 4: Companies Compute Data Summaries for Global Scaling ===
Accenture is computing data summaries for global scaling without sharing raw data.
GovTech is computing data summaries for global scaling without sharing raw data.
Tiktok is computing data summaries for global scaling without sharing raw data.


In [5]:
# Step 5: Trusted Authority Computes Global Scaling Parameters
print("\n=== Step 5: Trusted Authority Computes Global Scaling Parameters ===")
trusted_authority.compute_global_scaling(company_summaries)
print("Global scaling parameters have been computed and distributed to companies.")


=== Step 5: Trusted Authority Computes Global Scaling Parameters ===

=== Computing Global Scaling Parameters with Improved Bounds ===
Role: SOFTWARE ENGINEER, Feature: Age, Min: 21.0000, Max: 29.0000
Role: SOFTWARE ENGINEER, Feature: Years_of_Experience, Min: 0.0000, Max: 4.0000
Role: SOFTWARE ENGINEER, Feature: Years_of_Tenure, Min: 0.0000, Max: 2.0000
Role: SOFTWARE ENGINEER, Feature: Salary, Min: 3647.9700, Max: 12600.0000
Role: DATA SCIENTIST, Feature: Age, Min: 21.0000, Max: 29.0000
Role: DATA SCIENTIST, Feature: Years_of_Experience, Min: 0.0000, Max: 4.0000
Role: DATA SCIENTIST, Feature: Years_of_Tenure, Min: 0.0000, Max: 2.0000
Role: DATA SCIENTIST, Feature: Salary, Min: 4512.5000, Max: 11550.0000
Role: PRODUCT MANAGER, Feature: Age, Min: 21.0000, Max: 29.0000
Role: PRODUCT MANAGER, Feature: Years_of_Experience, Min: 0.0000, Max: 4.0000
Role: PRODUCT MANAGER, Feature: Years_of_Tenure, Min: 0.0000, Max: 2.0000
Role: PRODUCT MANAGER, Feature: Salary, Min: 6654.1100, Max: 12600.0

In [6]:
# Step 6: Companies Encrypt Data and Send to Central Analyzer
print("\n=== Step 6: Companies Encrypt Data and Send to Central Analyzer ===")
for company_name, company in companies.items():
    company.set_global_scaling(trusted_authority.distribute_scaling_parameters())
    print(f"{company_name} encrypts data for all roles and sends to Central Analyzer.")
    for role in company.roles:
        encrypted_data = company.encrypt_data_by_role(role)
        central_analyzer.receive_company_data(company_name, role, encrypted_data)
print("All companies have sent encrypted data to Central Analyzer.")



=== Step 6: Companies Encrypt Data and Send to Central Analyzer ===
Accenture received global scaling parameters.
Accenture encrypts data for all roles and sends to Central Analyzer.
Received data from Accenture for role SOFTWARE ENGINEER
Received data from Accenture for role DATA SCIENTIST
Received data from Accenture for role PRODUCT MANAGER
GovTech received global scaling parameters.
GovTech encrypts data for all roles and sends to Central Analyzer.
Received data from GovTech for role SOFTWARE ENGINEER
Received data from GovTech for role DATA SCIENTIST
Received data from GovTech for role PRODUCT MANAGER
Tiktok received global scaling parameters.
Tiktok encrypts data for all roles and sends to Central Analyzer.
Received data from Tiktok for role SOFTWARE ENGINEER
Received data from Tiktok for role DATA SCIENTIST
Received data from Tiktok for role PRODUCT MANAGER
All companies have sent encrypted data to Central Analyzer.


In [7]:
# Step 7: Central Analyzer Aggregates Encrypted Data Without Decryption
print("\n=== Step 7: Central Analyzer Aggregates Encrypted Data Without Decryption ===")
print("Central Analyzer aggregates the encrypted data for each role without accessing raw data, preserving data privacy through FHE.")


=== Step 7: Central Analyzer Aggregates Encrypted Data Without Decryption ===
Central Analyzer aggregates the encrypted data for each role without accessing raw data, preserving data privacy through FHE.


In [8]:
# Step 8: Trusted Authority Decrypts Data and Trains the Model
print("\n=== Step 8: Trusted Authority Decrypts Data and Trains the Model ===")
for role in central_analyzer.role_data.keys():
    print(f"\nProcessing role: {role}")
    encrypted_result, scaler_info, salary_scaler = central_analyzer.aggregate_encrypted_data_by_role(role)
    coefficients, feature_names = trusted_authority.decrypt_and_train(encrypted_result)
    print(f"Model coefficients for role {role} have been computed using encrypted data.")

    # Display coefficients
    print(f"\n=== Model Coefficients for Role: {role} ===")
    for feature, coef in zip(feature_names, coefficients):
        print(f"{feature}: {coef:.4f}")
    save_model(role, coefficients, feature_names, scaler_info, salary_scaler)


=== Step 8: Trusted Authority Decrypts Data and Trains the Model ===

Processing role: SOFTWARE ENGINEER

Aggregating data for role: SOFTWARE ENGINEER
Number of companies contributing: 3
Model coefficients for role SOFTWARE ENGINEER have been computed using encrypted data.

=== Model Coefficients for Role: SOFTWARE ENGINEER ===
Age: 0.1080
Years_of_Experience: 0.1019
Years_of_Tenure: 0.0329
experience_ratio: 0.5366
Gender_Male: 0.0549
Education_Level_Bachelor's: 0.1159
Education_Level_Master's: 0.1007
Model saved for role: SOFTWARE ENGINEER

Processing role: DATA SCIENTIST

Aggregating data for role: DATA SCIENTIST
Number of companies contributing: 3
Model coefficients for role DATA SCIENTIST have been computed using encrypted data.

=== Model Coefficients for Role: DATA SCIENTIST ===
Age: 0.1007
Years_of_Experience: 0.1463
Years_of_Tenure: 0.0459
experience_ratio: 0.4173
Gender_Male: 0.1394
Education_Level_Bachelor's: 0.3159
Education_Level_Master's: 0.3250
Model saved for role: DATA

In [9]:
# Step 9: Make Predictions
print("\n=== Step 9: Making Predictions with the Trained Models ===")
test_input = {
    'Age': 24,
    'Years_of_Experience': 0,
    'Years_of_Tenure': 0,
    'Gender_Male': 0,
    'Education_Level_Bachelor\'s': 0,
    'Education_Level_Master\'s': 0
}
for role in central_analyzer.role_data.keys():
    predicted_salary = predict_salary(role, test_input.copy())
    if predicted_salary is not None:
        print(f"Predicted salary for role {role}: ${predicted_salary:,.2f}")

print("\nProcess completed successfully. All models have been trained using encrypted data, preserving the privacy of each company's information.")


=== Step 9: Making Predictions with the Trained Models ===
Predicted salary for role SOFTWARE ENGINEER: $4,010.43
Predicted salary for role DATA SCIENTIST: $4,778.16
Predicted salary for role PRODUCT MANAGER: $6,901.18

Process completed successfully. All models have been trained using encrypted data, preserving the privacy of each company's information.
