PRACTICE LAB-1

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [2]:
# Load California Housing Dataset
data = fetch_california_housing()
X, y = data.data, data.target
# Inference: This dataset contains housing data for California, including various features and target house prices.


In [3]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Inference: 80% of the data is used for training, and 20% is used for testing to evaluate model performance.

In [4]:
# Function to introduce random normally distributed noise
def add_normal_noise(X, proportion=0.1, scale=0.1):
    X_noisy = X.copy()
    num_samples = int(proportion * len(X))
    indices = np.random.choice(len(X), num_samples, replace=False)
    noise = np.random.normal(loc=0, scale=scale, size=X_noisy[indices].shape)
    X_noisy[indices] += noise
    return X_noisy
# Inference: This function introduces Gaussian noise to a subset of training data, simulating random errors.

In [5]:
# Function to introduce random uniformly distributed noise
def add_uniform_noise(X, proportion=0.1, scale=0.1):
    X_noisy = X.copy()
    num_samples = int(proportion * len(X))
    indices = np.random.choice(len(X), num_samples, replace=False)
    noise = np.random.uniform(low=-scale, high=scale, size=X_noisy[indices].shape)
    X_noisy[indices] += noise
    return X_noisy
# Inference: This function adds uniform noise, distributing errors evenly across the selected records.


In [6]:
# Function to introduce data poisoning attack
def add_data_poisoning(X, y, proportion=0.05, bias_value=1.0):
    X_poisoned, y_poisoned = X.copy(), y.copy()
    num_samples = int(proportion * len(X))
    indices = np.random.choice(len(X), num_samples, replace=False)
    X_poisoned[indices, 0] += bias_value  # Modify the first feature (MedInc) to introduce bias
    y_poisoned[indices] += bias_value  # Modify the target to introduce bias
    return X_poisoned, y_poisoned
# Inference: This attack deliberately modifies feature values and target values to create biased learning patterns.

In [7]:
# Function to train and evaluate a model
def evaluate_model(X_train, y_train, X_test, y_test):
    model = LinearRegression()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    return mae, mse
# Inference: This function trains a Linear Regression model and evaluates its performance using MAE and MSE.

In [8]:
# Baseline performance on clean data
baseline_mae, baseline_mse = evaluate_model(X_train, y_train, X_test, y_test)
print(f"Baseline MAE: {baseline_mae:.4f}, Baseline MSE: {baseline_mse:.4f}")
# Inference: This baseline performance serves as a benchmark to compare against the adversarially attacked models.

Baseline MAE: 0.5332, Baseline MSE: 0.5559


In [9]:
# Apply attacks and evaluate performance
attacks = {
    "Gaussian Noise Attack": add_normal_noise(X_train),
    "Uniform Noise Attack": add_uniform_noise(X_train),
    "Data Poisoning Attack": add_data_poisoning(X_train, y_train)[0]
}

In [10]:
for attack_name, X_attacked in attacks.items():
    mae, mse = evaluate_model(X_attacked, y_train, X_test, y_test)
    print(f"{attack_name} - MAE: {mae:.4f}, MSE: {mse:.4f}")
    # Inference: Each attack degrades model performance differently, illustrating vulnerabilities to adversarial manipulations.

Gaussian Noise Attack - MAE: 0.5333, MSE: 0.5545
Uniform Noise Attack - MAE: 0.5332, MSE: 0.5555
Data Poisoning Attack - MAE: 0.5307, MSE: 0.5534


In [11]:

# Final Inference:
# This experiment demonstrates how adversarial attacks can impact a Linear Regression model.
# Gaussian and Uniform noise introduce random errors, slightly affecting model performance.
# However, the data poisoning attack, which manipulates specific features and targets, causes significant model degradation.
# These findings highlight the importance of robust data preprocessing and validation techniques to mitigate adversarial vulnerabilities.