# Partie 1

In [30]:
import pandas as pd
import re

def extract_data(input_path, pattern):
    with open(input_path, 'r') as file:
        content = file.read()
    matches = re.findall(pattern, content, re.MULTILINE)
    data = []
    for match in matches:
        numbers = match[1].replace('\n', ' ').split()
        numbers_float = [float(number) for number in numbers]
        data.append(numbers_float)
    return pd.DataFrame(data)

rocks_path = 'data/sonar.rocks'
mines_path = 'data/sonar.mines'


train_pattern = r'\*(CR|CM)\d+:\n\{([\d\s\.\n]+)\}'
test_pattern = r'^(?!\*)(CR|CM)\d+:\n\{([\d\s\.\n]+)\}'


rocks_train_df = extract_data(rocks_path, train_pattern)
mines_train_df = extract_data(mines_path, train_pattern)
rocks_train_df['Label'] = 'R'  
mines_train_df['Label'] = 'M' 
train_df = pd.concat([rocks_train_df, mines_train_df], ignore_index=True).sample(frac=1).reset_index(drop=True)

rocks_test_df = extract_data(rocks_path, test_pattern)
mines_test_df = extract_data(mines_path, test_pattern)
rocks_test_df['Label'] = 'R'  
mines_test_df['Label'] = 'M' 
test_df = pd.concat([rocks_test_df, mines_test_df], ignore_index=True).sample(frac=1).reset_index(drop=True)

## Algorithme d'entraînement

In [31]:
import numpy as np

def update_weights_minimerror(X, y, weights, T, learning_rate):
    gradients = np.zeros_like(weights)
    
    for mu in range(X.shape[0]):
        xi_mu = X[mu]
        tau_mu = y[mu]
        gamma_mu = (np.dot(xi_mu, weights)) * tau_mu / np.linalg.norm(weights)
        tanh_term = np.tanh(gamma_mu / (2 * T))
        derivative = (1 - tanh_term**2) / (2 * T)
        gradients += (xi_mu * tau_mu) * derivative
    
    # Apply the weight update outside the loop for efficiency
    weights -= learning_rate * gradients / len(X)
    
    return weights


# Assuming train_df and test_df are pandas DataFrames with the last column 'Label' 
# and all other columns as features.

# Map labels 'R' and 'M' to -1 and 1 respectively
train_df['Label'] = train_df['Label'].map({'R': -1, 'M': 1})
test_df['Label'] = test_df['Label'].map({'R': -1, 'M': 1})

# Extract features and labels
X_train = train_df.drop('Label', axis=1).values
y_train = train_df['Label'].values
X_test = test_df.drop('Label', axis=1).values
y_test = test_df['Label'].values

# Add a bias term (column of ones) to the features
X_train = np.hstack((np.ones((X_train.shape[0], 1)), X_train))
X_test = np.hstack((np.ones((X_test.shape[0], 1)), X_test))

# Initialize weights including bias
weights = np.random.normal(0, 1, X_train.shape[1])

# Set hyperparameters
learning_rate = 0.01
epochs = 25000
initial_T = 1
final_T = 0.01
T_decrease_factor = (final_T / initial_T) ** (1 / epochs)

# Train with the Minimerror rule
for epoch in range(epochs):
    T = initial_T * (T_decrease_factor ** epoch)
    weights = update_weights_minimerror(X_train, y_train, weights, T, learning_rate)


Ea et Eg

In [32]:
def predict_perceptron(inputs, weights):
    activation = np.dot(inputs, weights[1:]) + weights[0]
    return 1 if activation >= 0 else -1


def calculate_accuracy(features, labels, weights):
    predictions = [predict_perceptron(x, weights) for x in features]
    correct_predictions = sum(pred == label for pred, label in zip(predictions, labels))
    accuracy = correct_predictions / len(labels)
    return accuracy


test_features = test_df.iloc[:, :-1].values
test_labels = test_df.iloc[:, -1].values


training_accuracy = calculate_accuracy(train_features, train_labels, weights)
testing_accuracy = calculate_accuracy(test_features, test_labels, weights)


Ea = 1 - training_accuracy
Eg = 1 - testing_accuracy

print(f"Erreur d'apprentissage (Ea) : {Ea}")
print(f"Erreur de généralisation (Eg) : {Eg}")


Erreur d'apprentissage (Ea) : 0.7884615384615384
Erreur de généralisation (Eg) : 0.6826923076923077
