In [1]:
import numpy as np
import pandas as pd
import random, os, time, csv, warnings, math
from sklearn import metrics, decomposition, discriminant_analysis
from scipy import stats, special, optimize
from sklearn.model_selection import train_test_split
warnings.filterwarnings('ignore') # AMI warning

In [2]:
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
from preprocess_utils import *
from postprocess_utils import *
from vis import *
from literature_models import *
from fit import *
from femda import *
from run_experiments import run_algorithms as run_all
from run_experiments import test_models as test_all
from run_experiments import NUMBER_OF_ALGORITHMS,NUMBER_OF_MEASURES_REAL,NUMBER_OF_MEASURES_SYNTH,NUMBER_OF_RUNS

In [4]:
from tqdm import tqdm

In [5]:
import breizhcrops as bzh

In [6]:
bzh_train_raw = bzh.BreizhCrops("frh01")

In [7]:
bzh_test_raw = bzh.BreizhCrops("frh04")

In [8]:
def Dataset2numpy(dataset, l=-1):
    if l==-1: l = len(dataset)
    TD = dataset[0][0].shape
    X = np.zeros((l, *TD))
    y = np.zeros((l))
    for i in tqdm(range(len(dataset))):
        if i==l: break
        X[i,:,:] = dataset[i][0].numpy()
        y[i] = dataset[i][1].numpy()
    X_flat = X.reshape(l, -1)
    return X_flat, y

In [25]:
bzh_train, bzh_y = Dataset2numpy(bzh_train_raw, l=10000)

  6%|▌         | 10000/178613 [03:43<1:02:51, 44.71it/s]


In [26]:
bzh_test, bzh_test_y = Dataset2numpy(bzh_test_raw, l=3000)

  2%|▏         | 3000/122614 [00:41<27:40, 72.02it/s]


In [9]:
%store -r bzh_train

In [10]:
%store -r bzh_test

In [11]:
%store -r bzh_y

In [12]:
%store -r bzh_test_y

In [13]:
cats = np.unique(bzh_y)

In [14]:
print(np.unique(bzh_y), np.unique(bzh_test_y))

[0. 1. 2. 3. 5. 7. 8.] [0. 1. 2. 3. 5. 7. 8.]


In [15]:
print([(bzh_y == u).sum() for u in cats], [(bzh_test_y == u).sum() for u in cats])

[706, 1565, 279, 2270, 26, 1985, 3169] [162, 452, 75, 819, 10, 629, 853]


In [16]:
bzh_pca, bzh_labels, _, _, pca = import_subset(bzh_train, bzh_y, cats, 30, -1)

In [17]:
bzh_test_pca, bzh_test_labels, _, _,_ = import_subset(bzh_test, bzh_test_y, cats, 30, -1, pca=pca)

In [18]:
run_all(bzh_pca, bzh_labels, bzh_test_pca, bzh_test_labels, percent_outliers=0, conf=True, verbose=True, return_results=False)

LDA
[0.0706 0.1565 0.0279 0.227  0.0026 0.1985 0.3169]
N 3000 Accuracy 0.28467 ARI 0.00234 AMI 0.00037
[[  0   1   0  28   0   2 131]
 [  0   4   0  83   0   1 364]
 [  0   0   0  18   0   0  57]
 [  0   2   0 148   0   4 665]
 [  0   0   0   0   0   1   9]
 [  0   4   0 132   0   5 488]
 [  0   4   0 147   0   5 697]]
QDA
[0.0706 0.1565 0.0279 0.227  0.0026 0.1985 0.3169]
RQDA-MCD
RGQDA-MCD
t-QDA
[0.0706 0.1565 0.0279 0.227  0.0026 0.1985 0.3169]
FEMDA with p/trace
[0.0706 0.1565 0.0279 0.227  0.0026 0.1985 0.3169]
N 3000 Accuracy 0.28433 ARI 0.0 AMI 0.0
[[  0   0   0   0 162   0   0]
 [  0   0   0   0 452   0   0]
 [  0   0   0   0  75   0   0]
 [  0   0   0   0 819   0   0]
 [  0   0   0   0  10   0   0]
 [  0   0   0   0 629   0   0]
 [  0   0   0   0 853   0   0]]
FEMDA pre-normalised
[0.0706 0.1565 0.0279 0.227  0.0026 0.1985 0.3169]
N 3000 Accuracy 0.28433 ARI 0.0 AMI 0.0
[[  0   0   0   0 162   0   0]
 [  0   0   0   0 452   0   0]
 [  0   0   0   0  75   0   0]
 [  0   0   0  

{'LDA': <literature_models.LDA at 0x16aa00252e0>,
 'QDA': <literature_models.QDA at 0x16a9e6ddfa0>,
 'RQDA-MCD': <literature_models.RGQDA at 0x16ade8da850>,
 'RGQDA-MCD': <literature_models.RGQDA at 0x16ade8c1dc0>,
 't_QDA': <literature_models.t_QDA at 0x16aa00257c0>,
 'FEMDA': <femda.FEMDA at 0x16aa00540a0>,
 'FEMDA_N': <femda.FEMDA_N at 0x16aa00543d0>}