# Partie 1

In [61]:
import pandas as pd
import re

def extract_data(input_path, pattern):
    with open(input_path, 'r') as file:
        content = file.read()
    matches = re.findall(pattern, content, re.MULTILINE)
    data = []
    for match in matches:
        numbers = match[1].replace('\n', ' ').split()
        numbers_float = [float(number) for number in numbers]
        data.append(numbers_float)
    return pd.DataFrame(data)

rocks_path = 'data/sonar.rocks'
mines_path = 'data/sonar.mines'


train_pattern = r'\*(CR|CM)\d+:\n\{([\d\s\.\n]+)\}'
test_pattern = r'^(?!\*)(CR|CM)\d+:\n\{([\d\s\.\n]+)\}'


rocks_train_df = extract_data(rocks_path, train_pattern)
mines_train_df = extract_data(mines_path, train_pattern)
rocks_train_df['Label'] = 'R'  
mines_train_df['Label'] = 'M' 
train_df = pd.concat([rocks_train_df, mines_train_df], ignore_index=True).sample(frac=1).reset_index(drop=True)

rocks_test_df = extract_data(rocks_path, test_pattern)
mines_test_df = extract_data(mines_path, test_pattern)
rocks_test_df['Label'] = 'R'  
mines_test_df['Label'] = 'M' 
test_df = pd.concat([rocks_test_df, mines_test_df], ignore_index=True).sample(frac=1).reset_index(drop=True)

## Algorithme d'entraînement

In [62]:
import numpy as np

def minimerror_perceptron(training_features, training_labels, epochs, learning_rate, T_initial, T_decrease_factor):
    
    weights = np.zeros(training_features.shape[1] + 1)
    T = T_initial
    
    for _ in range(epochs):
        
        update = np.zeros(training_features.shape[1] + 1)
        
        for inputs, label in zip(training_features, training_labels):
            
            activation = np.dot(inputs, weights[1:]) + weights[0]
            predicted_label = np.tanh(activation / T)
            
            # Mise à jour de la règle Minimerror, en utilisant la tangente hyperbolique pour la "température"
            error = label - predicted_label
            update[1:] += learning_rate * error * inputs
            update[0] += learning_rate * error
        
        weights += update
        
        # Diminution de la "température" après chaque époque pour affiner l'apprentissage
        T *= T_decrease_factor
    
    return weights

train_df['Label'] = train_df['Label'].apply(lambda x: 1 if x == 'M' else -1)
test_df['Label'] = test_df['Label'].apply(lambda x: 1 if x == 'M' else -1)


train_features = train_df.iloc[:, :-1].values
train_labels = train_df.iloc[:, -1].values
weights = minimerror_perceptron(train_features, train_labels, 600, 0.001, T_initial=80, T_decrease_factor=0.99)
print (weights)


[-0.55982903  0.14124534  0.26391835  0.17257769  0.31821517  0.3782745
  0.17489325 -0.00966093 -0.33835602  0.16340255  0.27284463  0.45938301
  0.28839587  0.27704649  0.16459953 -0.00836317 -0.51009526 -0.15197134
  0.15895109  0.13645908 -0.03610791  0.0636274   0.17540315 -0.01643639
  0.18885217  0.08105086  0.00442228  0.00322948  0.04192206  0.19879167
  0.2621234  -0.56009298  0.14753189  0.01225274 -0.11287808 -0.05811205
 -0.29884217 -0.23448259  0.25761901  0.10734494 -0.34803285  0.02709948
 -0.0006769   0.07577536  0.31056787  0.56550536  0.4165845   0.32675041
  0.44052976  0.37876606 -0.00854166  0.06084007  0.05678369  0.01934325
  0.04428952 -0.01531017 -0.00360777 -0.02238041  0.03098726  0.01907104
  0.00633834]


Ea et Eg

In [63]:
def predict_perceptron(inputs, weights):
    activation = np.dot(inputs, weights[1:]) + weights[0]
    return 1 if activation >= 0 else -1


def calculate_accuracy(features, labels, weights):
    predictions = [predict_perceptron(x, weights) for x in features]
    correct_predictions = sum(pred == label for pred, label in zip(predictions, labels))
    accuracy = correct_predictions / len(labels)
    return accuracy


test_features = test_df.iloc[:, :-1].values
test_labels = test_df.iloc[:, -1].values


training_accuracy = calculate_accuracy(train_features, train_labels, weights)
testing_accuracy = calculate_accuracy(test_features, test_labels, weights)


Ea = 1 - training_accuracy
Eg = 1 - testing_accuracy

print(f"Erreur d'apprentissage (Ea) : {Ea}")
print(f"Erreur de généralisation (Eg) : {Eg}")


Erreur d'apprentissage (Ea) : 0.29807692307692313
Erreur de généralisation (Eg) : 0.1826923076923077
