In [1]:
import System.Random
import Data.List
import Data.Maybe
import Data.Ord
import Graphics.EasyPlot

In [2]:
type Sigma = Double
type Mean = Double
type Distance = Double
type Label = Int
type Point = (Double, Double)
type LabeledPoint = (Double, Double, Label)
type Centroid = (Double, Double, Label)

In [3]:

reposition :: [Double] -> Sigma -> Mean -> [Double]
reposition values sigma mu = map (\x -> sigma * x + mu) scaled
  where
    scaled = map (\x -> (x-mi)/(ma-mi) - 0.5) values
    mi = minimum values
    ma = maximum values

In [4]:
reposition [0..9] 0.5 0

[-0.25,-0.19444444444444445,-0.1388888888888889,-8.333333333333334e-2,-2.777777777777779e-2,2.777777777777779e-2,8.333333333333331e-2,0.1388888888888889,0.19444444444444442,0.25]

In [5]:
reposition [0..9] 1.0 0

[-0.5,-0.3888888888888889,-0.2777777777777778,-0.16666666666666669,-5.555555555555558e-2,5.555555555555558e-2,0.16666666666666663,0.2777777777777778,0.38888888888888884,0.5]

In [6]:
reposition [0..9] 1.0 1.0

[0.5,0.6111111111111112,0.7222222222222222,0.8333333333333333,0.9444444444444444,1.0555555555555556,1.1666666666666665,1.2777777777777777,1.3888888888888888,1.5]

In [7]:

randomClusters :: [Double] -> [Sigma] -> [Mean] -> [Mean] -> ([Double], [Double])
randomClusters values sigmas xmus ymus = (xs, ys)
  where
    clusters = genericLength sigmas
    pointsPerCluster = floor (genericLength values / (2.0 * clusters))
    chunks = chunk pointsPerCluster values
    xs = concatMap (\i -> reposition (chunks !! (2*i)) (sigmas !! i) (xmus !! i)) [0..(clusters-1)]
    ys = concatMap (\i -> reposition (chunks !! (2*i+1)) (sigmas !! i) (ymus !! i)) [0..(clusters-1)]
    chunk n [] = []
    chunk n list = genericTake n list : chunk n (genericDrop n list)

In [8]:
g <- newStdGen

In [9]:
values = take 80 (randoms g) :: [Double]

In [10]:
values

[0.9357653950579865,0.620696176336032,0.4417969315936716,0.8016212442476839,0.3885614657381379,0.7323678474147667,0.8600230686330098,0.4212687899977592,8.07757232687234e-2,0.2009498179755156,0.2101044621285989,0.5044650485584523,0.3994320878261912,0.18889092833388343,0.14534348155383148,0.915531863063101,0.9776884247072287,0.29232082365309275,0.12964404466415347,0.2629557003048818,0.20253887089195555,0.8948713030500158,0.23250232963227457,0.7360824309864079,0.5755157998745857,0.3696696332793994,0.6959179328344441,1.2374185159136353e-3,0.11028872533577061,0.6722683260168099,0.5460831472771348,0.9152575586332934,0.13876358967637215,0.6070326250842013,0.9543785894908988,0.9378909689242503,0.34293901139905236,5.998957830183049e-2,0.21999653012324827,0.5892979464987624,0.25137953941827385,4.271305548853166e-2,0.39452885653242054,0.8308236436029196,0.1966509850480208,0.7299374949710978,0.10489065949399434,0.20068039757873135,0.6118373807913038,0.5338328861426899,0.42308532929296083,0.3657603

In [11]:
(xs, ys) = randomClusters values [0.25, 0.5, 0.75, 1.0] [1, 1, -1, -1] [1, -1, 1, -1]

In [12]:
xs

[1.125,1.0328733845805997,0.9805630319982194,1.085776090274408,0.9649969182742587,1.065526314423903,1.102852853395741,0.9745605789063145,0.875,0.9101390486552019,0.8626308300635841,1.25,0.8793957822766151,1.1611555219583058,1.0713163641719299,0.9561427062804185,1.1386829530197948,0.75,0.8110156512119676,1.1254506846227856,-1.1764239812184938,-1.375,-1.0401969219012457,-0.625,-1.2285060321345922,-0.721007606819367,-1.3158291124273938,-1.2246714695191305,-0.8333967651058556,-0.9076292038888875,-1.219394572227824,-0.5256841995956445,-0.6964856279450652,-1.183351214571961,-1.3605194851479179,-1.4506641784959098,-1.1306470157742483,-1.0724526599544921,-1.5,-0.5]

In [13]:
ys

[0.8987194005873721,0.9854956924174358,0.9545324070033735,0.8924657379566387,0.8796281295115949,1.106676500927649,1.125,0.9229564462713249,0.875,0.9142997285218601,-0.9782538342409559,-0.7718702546476923,-1.2059621091107697,-0.9441805858867327,-0.75,-0.7592172535442541,-1.091819706214275,-1.25,-1.1605495764037326,-0.9540949958155068,0.9466000465097641,0.9009943458054493,1.181228511451751,1.240473071481274,1.2777241566410593,0.6800500048880908,1.178274735340657,0.7981976023233597,0.625,1.375,-0.7309823759448002,-1.0009628978479546,-1.4077370207098308,-0.6727274903560183,-0.5,-1.5,-1.0983889413943768,-1.1807392096656506,-1.2248758389922803,-0.5728805873974468]

In [14]:
plot X11 (zip xs ys)

True

In [15]:
cs = take 40 (randomRs (0,3) g)

In [16]:
cs

[0,1,3,0,1,0,1,3,2,1,0,2,1,0,0,2,0,3,1,3,1,3,1,2,2,1,1,1,3,0,2,0,1,3,1,0,3,3,0,2]

In [17]:

getCentroids :: [Double] -> [Double] -> [Label] -> [Centroid]
getCentroids xs ys cs = centroids
  where
    centroids = zip3 xmeans ymeans labels
    xmeans = means xs
    ymeans = means ys
    means ms = map (\c -> average (map (ms !!) (elemIndices c cs))) labels
    average vs =  sum vs / genericLength vs
    labels = nub cs

In [18]:
centroids = getCentroids xs ys cs

In [19]:
centroids

[(0.28419186290286225,-0.2924755657918671,0),(-0.1410661974295562,0.3395789034415523,1),(-0.19603037331991527,-0.19471860423904497,3),(-7.025112829742827e-2,7.974953665544855e-2,2)]

In [20]:
(ax, ay, a) = centroids !! 0

In [21]:
(bx, by, b) = centroids !! 1

In [22]:
(ax - bx)^2 + (ay - by)^2

0.5803372699556303