In [1]:
include("src/struct/distance.jl")
include("src/new_functions.jl")
include("src/merge.jl")
include("src\\utilities.jl")
using DataFrames, Random
using DataStructures
using StatsBase
using Statistics
using LinearAlgebra
using LazySets
using Polyhedra

In [2]:
function compute_dataframe_classes(
    cluster_interest::Int64,
    clusterId::Vector{Int64},
    y::Vector{Int64}
)
    count_classes_in_clusters = StatsBase.countmap(y[findall(x->x==cluster_interest, clusterId)])
    y_cluster = y[findall(x->x==cluster_interest, clusterId)]
    df = DataFrame()
    df."classes" = collect(keys(count_classes_in_clusters))
    df."count_classes" = collect(values(count_classes_in_clusters))
    df = sort(df, [:count_classes], rev=true)
    return (df, y_cluster)
end

function detect_points_small_classes(
    cluster_interest::Int64,
    clusterId::Vector{Int64},
    y::Vector{Int64}
)
    points_in_cluster = findall(x->x==cluster_interest, clusterId)
    points_reckon_with = Vector{Int64}([])
    points_sideline = Vector{Int64}([])
    for (i, point) in enumerate(points_in_cluster)
        if y[point] != main_class
            push!(points_sideline,i )
        else
            push!(points_reckon_with, i)
        end
    end
    return (points_reckon_with, points_sideline)
end

function trimming_cluster(
    clusters::Vector{Cluster},
    cluster_interest::Int64,
    points_reckon_with::Vector{Int64},
    points_sideline::Vector{Int64},
)
    matrix_interest = clusters[cluster_interest].x
    size(matrix_interest, 1)
    vectors_cluster = [matrix_interest[i,:] for i in 1:size(matrix_interest,1)]
    vectors_cluster[points_reckon_with]
    hull = LazySets.convex_hull(vectors_cluster[points_reckon_with])
    for i in 1:length(points_sideline)
        println(i, " ",element(Singleton(vectors_cluster[points_sideline[i]])) ∈ VPolytope(hull))
        if element(Singleton(vectors_cluster[points_sideline[i]])) ∈ VPolytope(hull)
            push!(points_reckon_with,  points_sideline[i])
        end
    end
    sort!(points_reckon_with)
    points_to_be_taken_out_cluster = points_sideline[points_sideline .∉ Ref(points_reckon_with)]
    new_clusters = Vector{Cluster}([])
    point_cluster  = points_reckon_with[1]
    trimmed_cluster = Cluster(point_cluster, matrix_interest, y_cluster)
    for point_cluster in points_reckon_with[2:end]
        new_cluster = Cluster(point_cluster, matrix_interest, y_cluster)
        merge!(trimmed_cluster, new_cluster)
    end

    for point_cluster in points_to_be_taken_out_cluster
        new_cluster = Cluster(point_cluster, matrix_interest, y_cluster)
        push!(clusters, new_cluster)
    end
    clusters[cluster_interest] = trimmed_cluster
    return clusters
end

function update_clusters(
    clusters:Vector{Cluster},
    cluster_interest::Int64,
    clusterId::Vector{Int64},
    y::Vector{Int64},
)
    df, y_cluster = compute_dataframe_classes(cluster_interest, clusterId, y)
    if size(df)[1]>1
        points_reckon_with, points_sideline =detect_points_small_classes(cluster_interest, clusterId, y_cluster)
        clusters = trimming_cluster(clusters, cluster_interest,points_reckon_with, points_sideline)
        return clusters
    else
        return clusters
    end
end

function ConvexHullMerge(
    x::Matrix{float64}, 
    y::Vector{Int}, 
    max_elements_small_classes::Int64,
    num_clusters::Int64,
)
    n = length(y)
    m = length(x[1,:])
    clusters = Vector{Cluster}([])
    for dataId in 1:size(x,1)
        push!(clusters, Cluster(dataId, x,y))
    end
    clusterId = collect(1:n) #On obtient un vecteur 1,2..., qui correspond pour chaque cluster à son clusterId
    distances = Vector{Distance}([])
    for id1 in 1:n-1
        for id2 in id1+1:n
            push!(distances, Distance(id1, id2, x))
        end
    end
    sort!(distances, by = v ->v.distance)
    remainingClusters=n
    distanceId = 1
    n_epochs = 1
    c1_bis = Nothing
    c2_bis = Nothing
    i = 1
    a=i
    

    while remainingClusters>= num_clusters
        distance = distances[distanceId]
        cId1 =clusterId[distance.ids[1]]
        cId2 = clusterId[distance.ids[2]]
        if cId1 != cId2
            c1 = clusters[cId1]
            c2 = clusters[cId2]
            count_classes_in_clusters = StatsBase.countmap([y[c2.dataIds]; y[c1.dataIds]])
            df = DataFrame()
            df."classes" = collect(keys(count_classes_in_clusters))
            df."count_classes" = collect(values(count_classes_in_clusters))
            df = sort(df, [:count_classes], rev=true)
            # if sum(df[df."count_classes".!=maximum(df."count_classes"),:]."count_classes") <= max_elements_small_classes
            if sum(df[df."classes".!=df[1,1],"count_classes"])<= max_elements_small_classes
                remainingClusters -=1
                if remainingClusters < num_clusters
                    break
                end
                merge!(c1, c2) #On merge les 2 clusters
                for id in c2.dataIds 
                    clusterId[id]= cId1 #On modifie le clusterId dans la serie pour le cluster_2, on lui affecte le cluster_1
                end
                # Vider le second cluster
                empty!(clusters[cId2].dataIds)
            end

        end
        distanceId += 1
    end
    df_clusters = DataFrame()
    df_clusters."cluster_id" = collect(keys(StatsBase.countmap(clusterId)))
    df_clusters."number_elements" = collect(values(StatsBase.countmap(clusterId)))
    higher_than_threshold(value::Int64) = value >= 1
    clusters_to_treat = filter(:"number_elements"=> higher_than_threshold, df_clusters)."cluster_id"
    for cluster_interest in clusters_to_treat
        clusters = update_clusters(
            clusters,
            cluster_interest,
            clusterId,
            y
        )   
    end
    return filter(x -> length(x.dataIds) > 0, clusters)
end 

UndefVarError: UndefVarError: int64 not defined