In [3]:
!pip install -r ../requirements.txt



# Chargement des données
DATA_PATH = "../data/ADEME-CarLabelling.csv"
df = pd.read_csv(DATA_PATH, sep=';', encoding='utf-8')
colonnes_utiles = ['Poids à vide', 'Puissance fiscale', 'Prix véhicule']
df = df[colonnes_utiles].dropna()
df = df[(df['Poids à vide'] > 400) & (df['Puissance fiscale'] > 1) & (df['Prix véhicule'] > 1000)]
df = pd.concat([df] * 10, ignore_index=True)

X = df.values.astype(np.float32)
X_mean = np.mean(X, axis=0)
X_std = np.std(X, axis=0)
X_scaled = (X - X_mean) / X_std

k = 4
np.random.seed(42)
centroids = X_scaled[np.random.choice(X_scaled.shape[0], k, replace=False)]
n_samples, n_features = X_scaled.shape

@cuda.jit
def cuda_assign_clusters_shared(data, centroids, labels):
    i = cuda.grid(1)
    shared_centroids = cuda.shared.array(shape=(4, 3), dtype=float32)

    # Copie des centroïdes dans la mémoire partagée
    tx = cuda.threadIdx.x
    if tx < centroids.shape[0]:
        for f in range(centroids.shape[1]):
            shared_centroids[tx][f] = centroids[tx][f]
    cuda.syncthreads()

    if i < data.shape[0]:
        min_dist = 1e20
        best_cluster = -1
        for j in range(shared_centroids.shape[0]):
            dist = 0.0
            for f in range(data.shape[1]):
                temp = data[i, f] - shared_centroids[j, f]
                dist += temp * temp
            if dist < min_dist:
                min_dist = dist
                best_cluster = j
        labels[i] = best_cluster

@cuda.jit
def cuda_sum_centroids(data, labels, sums, counts):
    i = cuda.grid(1)
    if i < data.shape[0]:
        label = labels[i]
        for f in range(data.shape[1]):
            cuda.atomic.add(sums, (label, f), data[i, f])
        cuda.atomic.add(counts, label, 1)

# Préparation GPU
threads_per_block = 128
blocks_per_grid = math.ceil(n_samples / threads_per_block)

d_data = cuda.to_device(X_scaled)
d_labels = cuda.device_array(n_samples, dtype=np.int32)
d_centroids = cuda.to_device(centroids)
d_sums = cuda.device_array((k, n_features), dtype=np.float32)
d_counts = cuda.device_array(k, dtype=np.int32)

start = cuda.event()
end = cuda.event()

start.record()

max_iter = 100
for _ in range(max_iter):
    cuda_assign_clusters_shared[blocks_per_grid, threads_per_block](d_data, d_centroids, d_labels)

    d_sums.copy_to_device(np.zeros((k, n_features), dtype=np.float32))
    d_counts.copy_to_device(np.zeros(k, dtype=np.int32))

    cuda_sum_centroids[blocks_per_grid, threads_per_block](d_data, d_labels, d_sums, d_counts)

    sums = d_sums.copy_to_host()
    counts = d_counts.copy_to_host()

    new_centroids = np.zeros_like(centroids)
    for j in range(k):
        if counts[j] > 0:
            new_centroids[j] = sums[j] / counts[j]

    if np.allclose(centroids, new_centroids):
        break

    d_centroids = cuda.to_device(new_centroids)
    centroids = new_centroids

end.record()
end.synchronize()
elapsed_time = cuda.event_elapsed_time(start, end)
print(f"Temps GPU (Shared Memory) : {elapsed_time:.2f} ms")

# Résultats
labels = d_labels.copy_to_host()
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X_scaled[:, 0], X_scaled[:, 1], X_scaled[:, 2], c=labels, cmap='viridis', s=1)
ax.set_xlabel('Poids à vide')
ax.set_ylabel('Puissance fiscale')
ax.set_zlabel('Prix véhicule')
plt.title(f"K-Means CUDA v1 (Shared Memory)")
plt.tight_layout()
plt.show()

# Analyse finale
df['Cluster'] = labels
df.groupby('Cluster')[colonnes_utiles].mean()




In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import os
import time
from numba import cuda, float32
import math