In [3]:
import warnings
import pandas as pd
import numpy as np
import tenseal as ts
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score 
warnings.filterwarnings('ignore')
def sigmoid_approximation(z):
    return 0.5 + 0.125 * z  # Approximation for encrypted computations

def initialize_weights(n_features):
    weights = np.zeros(n_features)
    bias = 0
    return weights, bias

def train_logistic_regression(X, y, learning_rate=0.01, iterations=1000):
    weights, bias = initialize_weights(X.shape[1])
    
    for i in range(iterations):
        linear_model = np.dot(X, weights) + bias
        y_pred = sigmoid_approximation(linear_model)
        
        dw = (1/len(y)) * np.dot(X.T, (y_pred - y))
        db = (1/len(y)) * np.sum(y_pred - y)
        
        weights -= learning_rate * dw
        bias -= learning_rate * db
        
        if i % 100 == 0:
            cost = (-1/len(y)) * np.sum(y * np.log(y_pred + 1e-9) + (1 - y) * np.log(1 - y_pred + 1e-9))
            print(f"Iteration {i}: Cost {cost}")
    
    return weights, bias

def predict_encrypted(X_enc, weights, bias, context):
    predictions = []
    for x_enc in X_enc:
        linear_model_enc = x_enc.dot(weights) + bias
        y_pred_enc = sigmoid_approximation(linear_model_enc.decrypt())  # Decrypt to approximate sigmoid
        predictions.append(1 if y_pred_enc > 0.5 else 0)
    return predictions

def load_and_preprocess_data(file_path):
    df = pd.read_csv(file_path)
    features = [
        'Gender', 'Customer Type', 'Age', 'Type of Travel', 'Class', 
        'Flight Distance', 'Inflight wifi service', 'Departure/Arrival time convenient', 
        'Ease of Online booking', 'Gate location', 'Food and drink', 
        'Online boarding', 'Seat comfort', 'Inflight entertainment', 
        'On-board service', 'Leg room service', 'Baggage handling', 
        'Checkin service', 'Inflight service', 'Cleanliness'
    ]
    X = df[features]
    y = df['satisfaction'].apply(lambda x: 1 if x == 'satisfied' else 0)
    
    categorical_cols = ['Gender', 'Customer Type', 'Type of Travel', 'Class']
    label_encoders = {}
    
    for col in categorical_cols:
        le = LabelEncoder()
        X[col] = le.fit_transform(X[col])
        label_encoders[col] = le
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    return X_scaled, y.values, scaler, label_encoders

def encrypt_data(context, data):
    return [ts.ckks_vector(context, row.tolist()) for row in data]

def main():
    X_train, y_train, scaler, label_encoders = load_and_preprocess_data('C:/Users/priya/OneDrive/Documents/sem 8/DPSA LAB/CAT 1/data/train.csv')
    X_test, y_test, _, _ = load_and_preprocess_data('C:/Users/priya/OneDrive/Documents/sem 8/DPSA LAB/CAT 1/data/test.csv')
    
    weights, bias = train_logistic_regression(X_train, y_train)
    
    context = ts.context(ts.SCHEME_TYPE.CKKS, poly_modulus_degree=8192, coeff_mod_bit_sizes=[60, 40, 40, 60])
    context.global_scale = 2**40
    context.generate_galois_keys()
    context.generate_relin_keys()
    
    X_test_encrypted = encrypt_data(context, X_test)
    
    print("First 5 Encrypted Data Points:")
    for i in range(5):
        print(X_test_encrypted[i])
    
    y_pred_encrypted = predict_encrypted(X_test_encrypted, weights, bias, context)
    
    print("Accuracy:", accuracy_score(y_test, y_pred_encrypted))
    print("\nClassification Report:\n", classification_report(y_test, y_pred_encrypted))

if __name__ == "__main__":
    main()


Iteration 0: Cost 0.6931471785599453


KeyboardInterrupt: 

In [None]:
import warnings
import pandas as pd
import numpy as np
import tenseal as ts
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score

warnings.filterwarnings('ignore')

# Approximate sigmoid function for encrypted computations
def sigmoid_approximation(z):
    return [0.5 + 0.125 * val for val in z]  # Apply element-wise

def initialize_weights(n_features):
    weights = np.zeros(n_features)
    bias = 0
    return weights, bias

def train_logistic_regression(X, y, learning_rate=0.01, iterations=1000):
    weights, bias = initialize_weights(X.shape[1])
    
    for i in range(iterations):
        linear_model = np.dot(X, weights) + bias
        y_pred = sigmoid_approximation(linear_model)
        
        dw = (1/len(y)) * np.dot(X.T, (y_pred - y))
        db = (1/len(y)) * np.sum(y_pred - y)
        
        weights -= learning_rate * dw
        bias -= learning_rate * db
        
        if i % 100 == 0:
            cost = (-1/len(y)) * np.sum(y * np.log(y_pred + 1e-9) + (1 - y) * np.log(1 - y_pred + 1e-9))
            print(f"Iteration {i}: Cost {cost}")
    
    return weights, bias

def predict_encrypted(X_enc, weights, bias, context):
    predictions = []
    for x_enc in X_enc:
        linear_model_enc = x_enc.dot(weights) + bias
        decrypted_values = linear_model_enc.decrypt()  # Decrypt to get plaintext values
        y_pred_enc = sigmoid_approximation(decrypted_values)  # Apply sigmoid approximation
        
        predictions.append(1 if y_pred_enc[0] > 0.5 else 0)  # Take the first value
    return predictions

def load_and_preprocess_data(file_path):
    df = pd.read_csv(file_path)
    features = [
        'Gender', 'Customer Type', 'Age', 'Type of Travel', 'Class', 
        'Flight Distance', 'Inflight wifi service', 'Departure/Arrival time convenient', 
        'Ease of Online booking', 'Gate location', 'Food and drink', 
        'Online boarding', 'Seat comfort', 'Inflight entertainment', 
        'On-board service', 'Leg room service', 'Baggage handling', 
        'Checkin service', 'Inflight service', 'Cleanliness'
    ]
    X = df[features]
    y = df['satisfaction'].apply(lambda x: 1 if x == 'satisfied' else 0)
    
    categorical_cols = ['Gender', 'Customer Type', 'Type of Travel', 'Class']
    label_encoders = {}
    
    for col in categorical_cols:
        le = LabelEncoder()
        X[col] = le.fit_transform(X[col])
        label_encoders[col] = le
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    return X_scaled, y.values, scaler, label_encoders

def encrypt_data(context, data):
    return [ts.ckks_vector(context, row.tolist()) for row in data]

def main():
    # Load and preprocess training data
    X_train, y_train, scaler, label_encoders = load_and_preprocess_data('C:/Users/priya/OneDrive/Documents/sem 8/DPSA LAB/CAT 1/data/train.csv')
    X_test, y_test, _, _ = load_and_preprocess_data('C:/Users/priya/OneDrive/Documents/sem 8/DPSA LAB/CAT 1/data/test.csv')
    
    # Train logistic regression model
    weights, bias = train_logistic_regression(X_train, y_train)
    
    # Set up homomorphic encryption context
    context = ts.context(ts.SCHEME_TYPE.CKKS, poly_modulus_degree=8192, coeff_mod_bit_sizes=[60, 40, 40, 60])
    context.global_scale = 2**40
    context.generate_galois_keys()
    context.generate_relin_keys()
    
    # Encrypt test data
    X_test_encrypted = encrypt_data(context, X_test)
    
    print("First 5 Encrypted Data Points:")
    for i in range(5):
        print(X_test_encrypted[i])
    
    # Predict using encrypted test data
    y_pred_encrypted = predict_encrypted(X_test_encrypted, weights, bias, context)
    
    # Print results
    print("Accuracy:", accuracy_score(y_test, y_pred_encrypted))
    print("\nClassification Report:\n", classification_report(y_test, y_pred_encrypted))

if __name__ == "__main__":
    main()


TypeError: can only concatenate list (not "float") to list

In [5]:
import warnings
import pandas as pd
import numpy as np
import tenseal as ts
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score

warnings.filterwarnings('ignore')

# Approximate sigmoid function for encrypted computations
def sigmoid_approximation(z):
    return np.array([0.5 + 0.125 * val for val in z])  # Convert to NumPy array

def initialize_weights(n_features):
    weights = np.zeros(n_features)
    bias = 0
    return weights, bias

def train_logistic_regression(X, y, learning_rate=0.01, iterations=10):
    weights, bias = initialize_weights(X.shape[1])
    
    for i in range(iterations):
        linear_model = np.dot(X, weights) + bias
        y_pred = sigmoid_approximation(linear_model)
        
        dw = (1/len(y)) * np.dot(X.T, (y_pred - y))
        db = (1/len(y)) * np.sum(y_pred - y)
        
        weights -= learning_rate * dw
        bias -= learning_rate * db
        
        cost = (-1/len(y)) * np.sum(y * np.log(y_pred + 1e-9) + (1 - y) * np.log(1 - y_pred + 1e-9))
        print(f"Iteration {i}: Cost {cost}")
    
    return weights, bias

def predict_encrypted(X_enc, weights, bias):
    predictions = []
    for x_enc in X_enc:
        linear_model_enc = x_enc.dot(weights) + bias
        decrypted_values = linear_model_enc.decrypt()  # Decrypt to get plaintext values
        y_pred_enc = sigmoid_approximation(decrypted_values)  # Apply sigmoid approximation
        
        predictions.append(1 if y_pred_enc[0] > 0.5 else 0)  # Take the first value
    return predictions

def load_and_preprocess_data(file_path):
    df = pd.read_csv(file_path)
    features = [
        'Gender', 'Customer Type', 'Age', 'Type of Travel', 'Class', 
        'Flight Distance', 'Inflight wifi service', 'Departure/Arrival time convenient', 
        'Ease of Online booking', 'Gate location', 'Food and drink', 
        'Online boarding', 'Seat comfort', 'Inflight entertainment', 
        'On-board service', 'Leg room service', 'Baggage handling', 
        'Checkin service', 'Inflight service', 'Cleanliness'
    ]
    X = df[features]
    y = df['satisfaction'].apply(lambda x: 1 if x == 'satisfied' else 0)
    
    categorical_cols = ['Gender', 'Customer Type', 'Type of Travel', 'Class']
    label_encoders = {}
    
    for col in categorical_cols:
        le = LabelEncoder()
        X[col] = le.fit_transform(X[col])
        label_encoders[col] = le
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    return X_scaled, y.values, scaler, label_encoders

def encrypt_data(context, data):
    return [ts.ckks_vector(context, row.tolist()) for row in data]

def main():
    # Load and preprocess training data
    X_train, y_train, scaler, label_encoders = load_and_preprocess_data('C:/Users/priya/OneDrive/Documents/sem 8/DPSA LAB/CAT 1/data/train.csv')
    X_test, y_test, _, _ = load_and_preprocess_data('C:/Users/priya/OneDrive/Documents/sem 8/DPSA LAB/CAT 1/data/test.csv')
    print("Preprocessed")

    # Train logistic regression model
    weights, bias = train_logistic_regression(X_train, y_train)
    print(weights, bias)

    # Set up homomorphic encryption context
    context = ts.context(ts.SCHEME_TYPE.CKKS, poly_modulus_degree=8192, coeff_mod_bit_sizes=[60, 40, 40, 60])
    context.global_scale = 2**40
    context.generate_galois_keys()
    context.generate_relin_keys()
    print("context set")
    
    # Encrypt test data
    X_test_encrypted = encrypt_data(context, X_test)
    print("x test encrypted")

    print("First 5 Encrypted Data Points:")
    for i in range(5):
        print(X_test_encrypted[i])
    
    # Predict using encrypted test data
    y_pred_encrypted = predict_encrypted(X_test_encrypted, weights, bias)
    
    # Print results
    print("Accuracy:", accuracy_score(y_test, y_pred_encrypted))
    print("\nClassification Report:\n", classification_report(y_test, y_pred_encrypted))

if __name__ == "__main__":
    main()


Preprocessed
Iteration 0: Cost 0.6931471785599453
Iteration 1: Cost 0.6910300227812597
Iteration 2: Cost 0.6889323492453945
Iteration 3: Cost 0.6868538961452733
Iteration 4: Cost 0.6847944068236171
Iteration 5: Cost 0.6827536296347108
Iteration 6: Cost 0.680731317810977
Iteration 7: Cost 0.6787272293341454
Iteration 8: Cost 0.6767411268108289
Iteration 9: Cost 0.6747727773523167
[ 5.95167403e-04 -9.17941031e-03  6.65105233e-03 -2.19498106e-02
 -2.18682744e-02  1.45135195e-02  1.38007188e-02 -2.59204105e-03
  8.29034133e-03 -7.66735514e-06  1.00784543e-02  2.44993157e-02
  1.68835798e-02  1.92218184e-02  1.56166033e-02  1.51948615e-02
  1.19668116e-02  1.14625563e-02  1.18176558e-02  1.47215872e-02] -0.006629355195719602
context set
x test encrypted
First 5 Encrypted Data Points:
<tenseal.tensors.ckksvector.CKKSVector object at 0x0000019BE08BBCD0>
<tenseal.tensors.ckksvector.CKKSVector object at 0x0000019BE08BB9A0>
<tenseal.tensors.ckksvector.CKKSVector object at 0x0000019BE08BBCA0>
<te

In [None]:
import warnings
import pandas as pd
import numpy as np
import tenseal as ts
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score

warnings.filterwarnings('ignore')

# Approximate sigmoid function for encrypted computations
def sigmoid_approximation(z):
    return np.array([0.5 + 0.125 * val for val in z])  # Convert to NumPy array

def initialize_weights(n_features):
    weights = np.zeros(n_features)
    bias = 0
    return weights, bias

def train_logistic_regression(X, y, learning_rate=0.01, iterations=10):
    weights, bias = initialize_weights(X.shape[1])
    
    for i in range(iterations):
        linear_model = np.dot(X, weights) + bias
        y_pred = sigmoid_approximation(linear_model)
        
        dw = (1/len(y)) * np.dot(X.T, (y_pred - y))
        db = (1/len(y)) * np.sum(y_pred - y)
        
        weights -= learning_rate * dw
        bias -= learning_rate * db
        
        cost = (-1/len(y)) * np.sum(y * np.log(y_pred + 1e-9) + (1 - y) * np.log(1 - y_pred + 1e-9))
        print(f"Iteration {i}: Cost {cost}")
    
    return weights, bias

def predict_encrypted(X_enc, weights, bias):
    predictions = []
    for x_enc in X_enc:
        linear_model_enc = x_enc.dot(weights) + bias
        decrypted_values = linear_model_enc.decrypt()  # Decrypt to get plaintext values
        y_pred_enc = sigmoid_approximation(decrypted_values)  # Apply sigmoid approximation
        
        predictions.append(1 if y_pred_enc[0] > 0.5 else 0)  # Take the first value
    return predictions

def load_and_preprocess_data(file_path):
    df = pd.read_csv(file_path).head(2000)  # Select only first 2000 rows
    features = [
        'Gender', 'Customer Type', 'Age', 'Type of Travel', 'Class', 
        'Flight Distance', 'Inflight wifi service', 'Departure/Arrival time convenient', 
        'Ease of Online booking', 'Gate location', 'Food and drink', 
        'Online boarding', 'Seat comfort', 'Inflight entertainment', 
        'On-board service', 'Leg room service', 'Baggage handling', 
        'Checkin service', 'Inflight service', 'Cleanliness'
    ]
    X = df[features]
    y = df['satisfaction'].apply(lambda x: 1 if x == 'satisfied' else 0)
    
    categorical_cols = ['Gender', 'Customer Type', 'Type of Travel', 'Class']
    label_encoders = {}
    
    for col in categorical_cols:
        le = LabelEncoder()
        X[col] = le.fit_transform(X[col])
        label_encoders[col] = le
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    return X_scaled, y.values, scaler, label_encoders

def encrypt_data(context, data):
    return [ts.ckks_vector(context, row.tolist()) for row in data]

def main():
    # Load and preprocess training data
    X_train, y_train, scaler, label_encoders = load_and_preprocess_data('C:/Users/priya/OneDrive/Documents/sem 8/DPSA LAB/CAT 1/data/train.csv')
    X_test, y_test, _, _ = load_and_preprocess_data('C:/Users/priya/OneDrive/Documents/sem 8/DPSA LAB/CAT 1/data/test.csv')
    print("Preprocessed")

    # Train logistic regression model
    weights, bias = train_logistic_regression(X_train, y_train)
    print(weights, bias)

    # Set up homomorphic encryption context
    context = ts.context(ts.SCHEME_TYPE.CKKS, poly_modulus_degree=8192, coeff_mod_bit_sizes=[60, 40, 40, 60])
    context.global_scale = 2**40
    context.generate_galois_keys()
    context.generate_relin_keys()
    print("Context set")
    
    # Encrypt test data
    X_test_encrypted = encrypt_data(context, X_test)
    print("X test encrypted")

    print("First 5 Encrypted Data Points:")
    for i in range(5):
        print(X_test_encrypted[i])
    
    # Predict using encrypted test data
    y_pred_encrypted = predict_encrypted(X_test_encrypted, weights, bias)
    
    # Print results
    print("Accuracy:", accuracy_score(y_test, y_pred_encrypted))
    print("\nClassification Report:\n", classification_report(y_test, y_pred_encrypted))

if __name__ == "__main__":
    main()


Preprocessed
Iteration 0: Cost 0.6931471785599453
Iteration 1: Cost 0.6910639992414881
Iteration 2: Cost 0.6889998299695634
Iteration 3: Cost 0.6869544170613257
Iteration 4: Cost 0.6849275117689373
Iteration 5: Cost 0.6829188701486345
Iteration 6: Cost 0.6809282529342786
Iteration 7: Cost 0.6789554254152045
Iteration 8: Cost 0.6770001573181874
Iteration 9: Cost 0.6750622226933587
[ 0.0014124  -0.00888699  0.00463941 -0.02242098 -0.02114419  0.01474986
  0.01268907 -0.00301717  0.00753872  0.00026946  0.01062693  0.02246112
  0.01638698  0.02019326  0.01626775  0.01543968  0.01317612  0.01109645
  0.01294941  0.01426983] -0.006761877221511674
Context set
X test encrypted
First 5 Encrypted Data Points:
<tenseal.tensors.ckksvector.CKKSVector object at 0x00000200DD423AC0>
<tenseal.tensors.ckksvector.CKKSVector object at 0x00000200881A1F40>
<tenseal.tensors.ckksvector.CKKSVector object at 0x00000200881A1F70>
<tenseal.tensors.ckksvector.CKKSVector object at 0x00000200DB834A60>
<tenseal.tenso