In [11]:
import requests
from csv import reader
from random import seed
from random import randrange
from math import sqrt
from math import exp
from math import pi

# Fonction pour télécharger le fichier CSV à partir de l'URL
def download_csv(url):
    response = requests.get(url)
    content = response.text
    return content.split('\n')

# Charger le fichier CSV
def load_csv(filename):
    dataset = list(reader(filename))
    return dataset

# Fonction pour convertir une colonne de chaînes en nombres à virgule flottante
def str_column_to_float(dataset, column):
    for row in dataset:
        row[column] = float(row[column].strip())

# Fonction pour convertir une colonne de chaînes en entiers
def str_column_to_int(dataset, column):
    class_values = [row[column] for row in dataset]
    unique = set(class_values)
    lookup = {value: index for index, value in enumerate(unique)}
    for row in dataset:
        row[column] = lookup[row[column]]
    return lookup

# Fractionner le jeu de données en k plis
def cross_validation_split(dataset, n_folds):
    dataset_split = list()
    dataset_copy = list(dataset)
    fold_size = int(len(dataset) / n_folds)
    for _ in range(n_folds):
        fold = list()
        while len(fold) < fold_size:
            index = randrange(len(dataset_copy))
            fold.append(dataset_copy.pop(index))
        dataset_split.append(fold)
    return dataset_split

# Calculer le pourcentage de précision
def accuracy_metric(actual, predicted):
    correct = sum(1 for i in range(len(actual)) if actual[i] == predicted[i])
    return correct / float(len(actual)) * 100.0

# Évaluer un algorithme en utilisant une division de validation croisée
def evaluate_algorithm(dataset, algorithm, n_folds, *args):
    folds = cross_validation_split(dataset, n_folds)
    scores = list()
    for fold in folds:
        train_set = [row for row in dataset if row not in fold]
        test_set = list(fold)
        test_set_copy = [list(row) for row in test_set]
        for row in test_set_copy:
            row[-1] = None
        predicted = algorithm(train_set, test_set_copy, *args)
        actual = [row[-1] for row in fold]
        accuracy = accuracy_metric(actual, predicted)
        scores.append(accuracy)
    return scores

# Séparer le jeu de données par valeurs de classe, retourne un dictionnaire
def separate_by_class(dataset):
    separated = dict()
    for row in dataset:
        class_value = row[-1]
        if class_value not in separated:
            separated[class_value] = list()
        separated[class_value].append(row)
    return separated

# Calculer la moyenne d'une liste de nombres
def mean(numbers):
    return sum(numbers) / float(len(numbers))

# Calculer l'écart type d'une liste de nombres
def stdev(numbers):
    avg = mean(numbers)
    variance = sum((x - avg) ** 2 for x in numbers) / float(len(numbers) - 1)
    return sqrt(variance)

# Calculer la moyenne, l'écart type et le nombre pour chaque colonne dans un jeu de données
def summarize_dataset(dataset):
    summaries = [(mean(column), stdev(column), len(column)) for column in zip(*dataset)]
    del summaries[-1]
    return summaries

# Diviser le jeu de données par classe puis calculer les statistiques pour chaque ligne
def summarize_by_class(dataset):
    separated = separate_by_class(dataset)
    summaries = dict()
    for class_value, rows in separated.items():
        summaries[class_value] = summarize_dataset(rows)
    return summaries

# Calculer la fonction de distribution de probabilité gaussienne pour x
def calculate_probability(x, mean, stdev):
    exponent = exp(-((x - mean) ** 2) / (2 * stdev ** 2))
    return (1 / (sqrt(2 * pi) * stdev)) * exponent

# Calculer les probabilités de prédire chaque classe pour une ligne donnée
def calculate_class_probabilities(summaries, row):
    total_rows = sum(summaries[label][0][2] for label in summaries)
    probabilities = dict()
    for class_value, class_summaries in summaries.items():
        probabilities[class_value] = summaries[class_value][0][2] / float(total_rows)
        for i in range(len(class_summaries)):
            mean, stdev, _ = class_summaries[i]
            probabilities[class_value] *= calculate_probability(row[i], mean, stdev)
    return probabilities

# Prédire la classe pour une ligne donnée
def predict(summaries, row):
    probabilities = calculate_class_probabilities(summaries, row)
    best_label, best_prob = None, -1
    for class_value, probability in probabilities.items():
        if best_label is None or probability > best_prob:
            best_prob = probability
            best_label = class_value
    return best_label

# Algorithme du Naive Bayes
def naive_bayes(train, test):
    summarize = summarize_by_class(train)
    predictions = [predict(summarize, row) for row in test]
    return predictions

# Télécharger le fichier CSV iris à partir de l'URL
iris_url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/iris.csv'
iris_content = download_csv(iris_url)

# Charger le fichier CSV iris
iris_dataset = load_csv(iris_content)

# Convertir les colonnes de chaînes en nombres à virgule flottante
for i in range(len(iris_dataset[0]) - 1):
    str_column_to_float(iris_dataset, i)

# Convertir la colonne de classe en entiers
str_column_to_int(iris_dataset, len(iris_dataset[0]) - 1)

# Évaluer l'algorithme Naive Bayes
seed(1)
n_folds = 5
scores = evaluate_algorithm(iris_dataset, naive_bayes, n_folds)
print('Scores: %s' % scores)
print('Mean Accuracy: %.3f%%' % (sum(scores) / float(len(scores))))


Scores: [93.33333333333333, 96.66666666666667, 100.0, 93.33333333333333, 93.33333333333333]
Mean Accuracy: 95.333%


In [23]:
import requests
from csv import reader
from math import sqrt
from math import exp
from math import pi

# Fonction pour télécharger le fichier CSV à partir de l'URL
def download_csv(url):
    response = requests.get(url)
    content = response.text
    return content.split('\n')

# Charger le fichier CSV
def load_csv_from_url(url):
    content = download_csv(url)
    dataset = list(reader(content))
    return dataset

# Convert string column to float
def str_column_to_float(dataset, column):
    for row in dataset:
        row[column] = float(row[column].strip())

# Convert string column to integer
def str_column_to_int(dataset, column):
    class_values = [row[column] for row in dataset]
    unique = set(class_values)
    lookup = {value: index for index, value in enumerate(unique)}
    for row in dataset:
        row[column] = lookup[row[column]]
    return lookup

# Split the dataset by class values, returns a dictionary
def separate_by_class(dataset):
    separated = dict()
    for i in range(len(dataset)):
        vector = dataset[i]
        class_value = vector[-1]
        if class_value not in separated:
            separated[class_value] = list()
        separated[class_value].append(vector)
    return separated

# Calculate the mean of a list of numbers
def mean(numbers):
    return sum(numbers)/float(len(numbers))

# Calculate the standard deviation of a list of numbers
def stdev(numbers):
    avg = mean(numbers)
    variance = sum([(x-avg)**2 for x in numbers]) / float(len(numbers)-1)
    return sqrt(variance)

# Calculate the mean, stdev and count for each column in a dataset
def summarize_dataset(dataset):
    summaries = [(mean(column), stdev(column), len(column)) for column in zip(*dataset)]
    del(summaries[-1])
    return summaries

# Split dataset by class then calculate statistics for each row
def summarize_by_class(dataset):
    separated = separate_by_class(dataset)
    summaries = dict()
    for class_value, rows in separated.items():
        summaries[class_value] = summarize_dataset(rows)
    return summaries

# Calculate the Gaussian probability distribution function for x
def calculate_probability(x, mean, stdev):
    exponent = exp(-((x-mean)**2 / (2 * stdev**2 )))
    return (1 / (sqrt(2 * pi) * stdev)) * exponent

# Calculate the probabilities of predicting each class for a given row
def calculate_class_probabilities(summaries, row):
    total_rows = sum([summaries[label][0][2] for label in summaries])
    probabilities = dict()
    for class_value, class_summaries in summaries.items():
        probabilities[class_value] = summaries[class_value][0][2]/float(total_rows)
        for i in range(len(class_summaries)):
            mean, stdev, _ = class_summaries[i]
            probabilities[class_value] *= calculate_probability(row[i], mean, stdev)
    return probabilities

# Predict the class for a given row
def predict(summaries, row):
    probabilities = calculate_class_probabilities(summaries, row)
    best_label, best_prob = None, -1
    for class_value, probability in probabilities.items():
        if best_label is None or probability > best_prob:
            best_prob = probability
            best_label = class_value
    return best_label

# Make a prediction with Naive Bayes on Iris Dataset
iris_url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/iris.csv'
dataset = load_csv_from_url(iris_url)
for i in range(len(dataset[0])-1):
    str_column_to_float(dataset, i)
# convert class column to integers
str_column_to_int(dataset, len(dataset[0])-1)
# fit model
model = summarize_by_class(dataset)
# define a new record
row = [5.7, 2.9, 4.2, 1.3]
# predict the label
label = predict(model, row)
print('Data=%s, Predicted: %s' % (row, label))


Data=[5.7, 2.9, 4.2, 1.3], Predicted: 0
