# Partie 1

In [2]:
import pandas as pd
import re

def extract_data(input_path, pattern):
    with open(input_path, 'r') as file:
        content = file.read()
    matches = re.findall(pattern, content, re.MULTILINE)
    data = []
    for match in matches:
        numbers = match[1].replace('\n', ' ').split()
        numbers_float = [float(number) for number in numbers]
        data.append(numbers_float)
    return pd.DataFrame(data)

rocks_path = 'data/sonar.rocks'
mines_path = 'data/sonar.mines'


train_pattern = r'\*(CR|CM)\d+:\n\{([\d\s\.\n]+)\}'
test_pattern = r'^(?!\*)(CR|CM)\d+:\n\{([\d\s\.\n]+)\}'


rocks_train_df = extract_data(rocks_path, train_pattern)
mines_train_df = extract_data(mines_path, train_pattern)
rocks_train_df['Label'] = 'R'  
mines_train_df['Label'] = 'M' 
train_df = pd.concat([rocks_train_df, mines_train_df], ignore_index=True).sample(frac=1).reset_index(drop=True)

rocks_test_df = extract_data(rocks_path, test_pattern)
mines_test_df = extract_data(mines_path, test_pattern)
rocks_test_df['Label'] = 'R'  
mines_test_df['Label'] = 'M' 
test_df = pd.concat([rocks_test_df, mines_test_df], ignore_index=True).sample(frac=1).reset_index(drop=True)

## Algorithme d'entraînement

In [3]:
import numpy as np

def minimerror_perceptron(training_features, training_labels, epochs, learning_rate, T_initial, T_decrease_factor):
    
    weights = np.zeros(training_features.shape[1] + 1)
    T = T_initial
    
    for _ in range(epochs):
        
        update = np.zeros(training_features.shape[1] + 1)
        
        for inputs, label in zip(training_features, training_labels):
            
            activation = np.dot(inputs, weights[1:]) + weights[0]
            predicted_label = np.tanh(activation / T)
            
            # Mise à jour de la règle Minimerror, en utilisant la tangente hyperbolique pour la "température"
            error = label - predicted_label
            update[1:] += learning_rate * error * inputs
            update[0] += learning_rate * error
        
        weights += update
        
        # Diminution de la "température" après chaque époque pour affiner l'apprentissage
        T *= T_decrease_factor
    
    return weights

train_df['Label'] = train_df['Label'].apply(lambda x: 1 if x == 'M' else -1)
test_df['Label'] = test_df['Label'].apply(lambda x: 1 if x == 'M' else -1)


train_features = train_df.iloc[:, :-1].values
train_labels = train_df.iloc[:, -1].values
weights = minimerror_perceptron(train_features, train_labels, 1000, 0.001, T_initial=100, T_decrease_factor=0.99)
print (weights)


[-0.80830915  0.25625703  0.46563226  0.35877301  0.6234483   0.64277698
  0.17295119 -0.03330377 -0.66628792  0.20795886  0.29111373  0.59652312
  0.56565151  0.55749394  0.15636686 -0.15626178 -0.80248842 -0.15853216
  0.08335994  0.28419475  0.05990517 -0.06480158  0.09086023  0.05480104
  0.45706909 -0.01807372 -0.35836622 -0.13828506 -0.06612278  0.34833331
  0.4308203  -1.04470212  0.28246886  0.27517293 -0.08391642 -0.41048659
 -0.39812677 -0.20137764  0.29993622  0.0407947  -0.83818622 -0.14416697
  0.12943592  0.0935307   0.48129602  1.0589036   0.98771646  0.67105011
  0.78775013  0.65987466  0.00705074  0.12850767  0.12051211  0.03272064
  0.08298007 -0.01897161 -0.00463763 -0.03162413  0.0516127   0.05094997
  0.02553518]


Ea et Eg

In [None]:
def predict_perceptron(inputs, weights):
    activation = np.dot(inputs, weights[1:]) + weights[0]
    return 1 if activation >= 0 else -1


def calculate_accuracy(features, labels, weights):
    predictions = [predict_perceptron(x, weights) for x in features]
    correct_predictions = sum(pred == label for pred, label in zip(predictions, labels))
    accuracy = correct_predictions / len(labels)
    return accuracy





test_features = test_df.iloc[:, :-1].values
test_labels = test_df.iloc[:, -1].values


training_accuracy = calculate_accuracy(train_features, train_labels, weights)
testing_accuracy = calculate_accuracy(test_features, test_labels, weights)


Ea = 1 - training_accuracy
Eg = 1 - testing_accuracy

print(f"Erreur d'apprentissage (Ea) : {Ea}")
print(f"Erreur de généralisation (Eg) : {Eg}")

