In [79]:
import numpy as np
import pandas as pd
import time
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from itertools import combinations

In [80]:
# Load data from file
def load(file):
    with open(file, "r") as f:
        lines = f.readlines()
    data = [list(map(int, line.strip().replace(" ", "").replace("\xa0", ""))) for line in lines]
    data = pd.DataFrame(data)
    return data.iloc[:, :-1], data.iloc[:, -1]

In [89]:
# Function to map the raw input data into the required features
def my_map(x_raw):
    if isinstance(x_raw, pd.DataFrame):
        x_raw = x_raw.values
    if x_raw.shape[1] != 8:
        raise ValueError("Input must have 8 features.")

    x_mapped = 1 - 2 * x_raw  # Mapping 0->1, 1->-1
    n_samples = x_mapped.shape[0]

    # Prepare array: bias + 8 linear + 7 cumulative + 105 (14+13+..+1) quadratic = 121
    psi = np.ones((n_samples, 121), dtype=np.int8)

    # Linear features
    psi[:, 1:9] = x_mapped

    # Cumulative features (excluding first)
    cum_features = np.empty((n_samples, 7), dtype=np.int8)
    for i in range(n_samples):
        cumprod = np.cumprod(x_mapped[i])
        cum_features[i] = cumprod[1:9]  # x0*x1, x0*x1*x2, ..., x0*x1*...*x6
        psi[i, 9:16] = cum_features[i]

    # Quadratic features of linear + cumulative (15 choose 2 = 105)
    combined = np.concatenate([x_mapped, cum_features], axis=1)  # shape (n_samples, 15)

    idx = 16
    for i, j in combinations(range(15), 2):
        psi[:, idx] = combined[:, i] * combined[:, j]
        idx += 1

    return psi

In [105]:
def my_fit(X_train, y_train, X_test, y_test):
    # Scaling the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    model = LogisticRegression(max_iter=100, C=0.01, solver='liblinear', random_state=50)
    start = time.time()
    model.fit(X_train, y_train)
    duration = time.time() - start

    # Make predictions on the test data
    y_pred = model.predict(X_test)

    # Calculate metrics
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, zero_division=0)
    rec = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    cm = confusion_matrix(y_test, y_pred)

    # Print out evaluation metrics
    print(f"Training Time: {duration:.4f} sec")
    print(f"Accuracy:  {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall:    {rec:.4f}")
    print(f"F1-Score:  {f1:.4f}")
    print("\nConfusion Matrix:")
    print(f"          Pred 0 | Pred 1")
    print(f"Actual 0 | {cm[0][0]:^7} | {cm[0][1]:^7}")
    print(f"Actual 1 | {cm[1][0]:^7} | {cm[1][1]:^7}")
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=['Class 0', 'Class 1'], zero_division=0))

    # Return the model and metrics
    return model, {'accuracy': acc, 'precision': prec, 'recall': rec, 'f1': f1, 'training_time': duration}


In [106]:
# Function to decode the model's weights into the original features
def my_decode(w):
    # Decode a 65-dimensional vector into four 64-dimensional vectors
    # Assuming the first 4 parts of the weight vector correspond to p, q, r, s
    p = w[:64]
    q = w[64:]
    r = np.zeros(64)  # Placeholder, modify as necessary
    s = np.zeros(64)  # Placeholder, modify as necessary
    return p, q, r, s

In [107]:
# Main script
print("Loading data...")
try:
    x_train_raw, y_train = load("public_trn.txt")
    x_test_raw, y_test = load("public_tst.txt")
except FileNotFoundError:
    print("Missing data files.")
    exit()

print("Mapping features...")
x_train = my_map(x_train_raw)
x_test = my_map(x_test_raw)

print(f"Train shape: {x_train.shape}, Test shape: {x_test.shape}")
model, metrics = my_fit(x_train, y_train, x_test, y_test)

print(f"\nFinal Test Accuracy: {metrics['accuracy']:.4f}")

Loading data...
Mapping features...
Train shape: (6400, 121), Test shape: (1600, 121)
Training Time: 0.0839 sec
Accuracy:  0.9263
Precision: 0.8829
Recall:    0.8829
F1-Score:  0.8829

Confusion Matrix:
          Pred 0 | Pred 1
Actual 0 |  1037   |   59   
Actual 1 |   59    |   445  

Classification Report:
              precision    recall  f1-score   support

     Class 0       0.95      0.95      0.95      1096
     Class 1       0.88      0.88      0.88       504

    accuracy                           0.93      1600
   macro avg       0.91      0.91      0.91      1600
weighted avg       0.93      0.93      0.93      1600


Final Test Accuracy: 0.9263
