In [31]:
import pandas as pd
import numpy as np

def perceptron_train(X, y, learning_rate=0.1, max_iteration=1000, gamma=0.1):
    # Initialize weights and bias
    w = np.zeros(X.shape[1])
    b = 0
    total_weight_change = 0

    for iteration in range(max_iteration+1):
        previous_w = np.copy(w)  # Track previous weights for weight change calculation
        total_weight_change = 0  # Reset total weight change for each iteration

        for i in range(len(y)):
            # Calculate the linear output (w · X[i] + b)
            pred = np.dot(X[i], w) + b

            # Apply the activation function (threshold based prediction)
            y_pred = 1 if pred > 0 else 0

           # Update weights and bias if there is a misclassification
            if y_pred != y[i]:
                error = y[i] - y_pred
                w += learning_rate * error * X[i]
                b += learning_rate * error

        # Calculate the total weight change
        total_weight_change = np.linalg.norm(w - previous_w)

        # Stop if weight change is less than gamma
        if total_weight_change < gamma:
            print(f"Stopped after {iteration+1} iterations")
            break

    return w, b, iteration

# Perceptron prediction
def perceptron_predict(X, w, b):
    pred = np.dot(X, w) + b
    return np.where(pred > 0, 1, 0)

# K-Fold Cross-Validation Function
def kfolds_cross_validation(X, y, k=5):
    fold_size = len(X) // k
    indices = np.arange(len(X))
    np.random.shuffle(indices)  # Shuffle once at the beginning
    X = X[indices]
    y = y[indices]

    accuracy_scores = []
    iteration_per_fold = []

    for fold in range(k):
        # Defining the test indices for this fold
        test_indices = indices[fold * fold_size:(fold + 1) * fold_size]
        train_indices = np.setdiff1d(indices, test_indices)

        # Train and Test splits data
        X_train, y_train = X[train_indices], y[train_indices]
        X_test, y_test = X[test_indices], y[test_indices]

        w, b, iteration = perceptron_train(X_train, y_train)

        # Predict on the test set
        y_pred = perceptron_predict(X_test, w, b)

        # Accuracy Calculation
        accuracy = np.mean(y_pred == y_test) * 100
        accuracy_scores.append(accuracy)
        print(f"Fold {fold + 1} Accuracy: {accuracy:.2f}%")

        # Iteration Calculation
        iteration_per_fold.append(iteration)
        print(f"Fold {fold + 1} Iterations: {iteration}")

    return accuracy_scores, iteration_per_fold

# Load dataset
df = pd.read_csv('Asgmnt1_data.txt', delimiter='\s+', header=None)

# Create class labels: first 8000 rows as class 0, and next 8000 rows as class 1
labels = [0] * 8000 + [1] * 8000

# Initialize 1 for wo (bias column)
wo = [1] * 16000

df.insert(0, 'weight(w0)', wo)

# Add class labels as a new column in the dataframe
df['class_label'] = labels

# Shuffle the data
shuffled_df = df.sample(frac=1).reset_index(drop=True)

# Extract features and labels
X = shuffled_df.drop('class_label', axis=1).values  # Features
y = shuffled_df['class_label'].values  # Labels

# Perform the 5-fold cross-validation
accuracy_per_fold, iteration_per_fold = kfolds_cross_validation(X, y, k=5)

# Output per folds
print(f"Cross-Validation Accuracies: {accuracy_per_fold}")
print(f"Number of Iterations per Fold: {iteration_per_fold}")


Fold 1 Accuracy: 50.56%
Fold 1 Iterations: 1000
Fold 2 Accuracy: 48.88%
Fold 2 Iterations: 1000
Fold 3 Accuracy: 49.84%
Fold 3 Iterations: 1000
Fold 4 Accuracy: 49.44%
Fold 4 Iterations: 1000
Fold 5 Accuracy: 49.41%
Fold 5 Iterations: 1000
Cross-Validation Accuracies: [50.5625, 48.875, 49.84375, 49.4375, 49.40625]
Number of Iterations per Fold: [1000, 1000, 1000, 1000, 1000]


In [29]:
import numpy as np

def haar_matrix(n):
    if n == 1:
        return np.array([[1]])  # Base case: 1x1 matrix

    # Recursive call to build a smaller Haar matrix (n // 2 x n // 2)
    h = haar_matrix(n // 2)

    # Construct the larger Haar matrix using Kronecker products
    h_n = np.kron(h, [1, 1])       # Top part of the Haar matrix
    h_i = np.kron(np.eye(len(h)), [1, -1])  # Bottom part of the Haar matrix

    # Stack the two parts vertically
    h = np.vstack((h_n, h_i))

    h = np.where(np.abs(h) < 1e-10, 0,h)

    return h

H_128 = haar_matrix(128)

df = pd.read_csv('Asgmnt1_data.txt', delimiter='\s+', header=None)
df_array = df.values

wavelet_transformed_data = np.dot(df_array, H_128.T)

first_4_coeffs = wavelet_transformed_data[:, :4]

wo = np.ones((16000, 1))  # Reshape as column
labels = np.array([0] * 8000 + [1] * 8000).reshape(-1, 1)

# Combine the coefficients, wo, and labels into a DataFrame
first_4_df = pd.DataFrame(first_4_coeffs, columns=[f'coeff_{i}' for i in range(1, 5)])
first_4_df['wo'] = wo
first_4_df['class_label'] = labels

first_4_df = first_4_df[['wo', 'coeff_1', 'coeff_2', 'coeff_3', 'coeff_4', 'class_label']]

X = first_4_df.drop('class_label', axis=1).values
y = first_4_df['class_label'].values

accuracy_per_fold, iteration_per_fold = kfolds_cross_validation(X, y, k=5)

print(f"Cross-Validation Accuracies: {accuracy_per_fold}")
print(f"Number of Iterations per Fold: {iteration_per_fold}")


Fold 1 Accuracy: 49.56%
Fold 1 Iterations: 1000
Fold 2 Accuracy: 48.72%
Fold 2 Iterations: 1000
Fold 3 Accuracy: 49.50%
Fold 3 Iterations: 1000
Fold 4 Accuracy: 50.22%
Fold 4 Iterations: 1000
Fold 5 Accuracy: 49.75%
Fold 5 Iterations: 1000
Cross-Validation Accuracies: [49.5625, 48.71875, 49.5, 50.21875, 49.75]
Number of Iterations per Fold: [1000, 1000, 1000, 1000, 1000]
