# QMiner analytics guide

Data analytics is done in QMiner which is an analytics platform for large-scale real-time streams containing structured and unstructured data. It is developed by AILab at Jozef Stefan Institute, Quintelligence and other contributors.

We are using QMiner's [javascript-API](https://rawgit.com/qminer/qminer/master/nodedoc/index.html).
Installation instructions, documentation and other can be found on [QMiner homepage](https://qminer.github.io/)


## 1. Setup
All analytics dependencies can be installed using **setup.sh** script, you need to have [NodeJs](https://nodejs.org/en/download/package-manager/) installed.

In [1]:
var qm = require('qminer'); // import QMiner
'loaded'

var utils = require('./utils.js'); // utility functions

'loaded'

## 2. Data import

In order to use the analytics data (weather data and sales data) we first have to import it to [QMiner database](https://rawgit.com/qminer/qminer/master/nodedoc/module-qm.Base.html). We need do define a schema for weather data store and sales data store.

In [2]:
let base = new qm.Base({ 
    mode: 'createClean',
    schema: [
        // Products store
        {
            name: 'Products',
            fields: [
                { name: "ProductId",    type: "string", shortstring: true, primary: true },
                { name: "ProductType",  type: "string", shortstring: true }
            ],
            joins: [
                { name: "wasSold",      type: "index", store: "Sales", inverse: "soldProduct" }
            ]
        },
        
        // Sales store
        {
            name: 'Sales',
            fields: [
                { name: "Timestamp",    type: "datetime" },
                { name: "Quantity",     type: "int" },
            ],
            joins: [
                { name: "soldProduct",  type: "field", store: "Products", inverse: "wasSold" }
            ]
        },
        
        // Surface temperature parameter (2t) store
        {
            name: 'T',
            fields: [
                { name: "Timestamp",        type: "datetime"},
                { name: "Region",           type: "int"},
                { name: "DayOffset",        type: "int"},
                { name: "FromHour",         type: "int"},
                { name: "ToHour",           type: "int"},
                
                { name: "Max",              type: "float"},
                { name: "Min",              type: "float"},
                { name: "Mean",             type: "float"}
            ]
        },
        
        // Total percipitation parameter (tp) store
        {
            name: 'Tp',
            fields: [
                { name: "Timestamp",        type: "datetime"},
                { name: "Region",           type: "int"},
                { name: "DayOffset",        type: "int"},
                { name: "FromHour",         type: "int"},
                { name: "ToHour",           type: "int"},
                
                { name: "Cum",              type: "float"}
            ]
        }
    ]
});


### 2.1. Weather data import

In [3]:
let tempStore = base.store("T"); // temperature store
let precipStore = base.store("Tp"); // total precipitation store

// weather file parsing logic
function onLineWeather(lineVals) {
    let weatherParam = lineVals[9];
    let timestamp = new Date(lineVals[7]);
    timestamp.setHours(0);
        
    if(weatherParam == '2t'){
        let rec = {
            Timestamp: timestamp,
            Region: parseInt(lineVals[6]),
            DayOffset: parseInt(lineVals[1]),
            FromHour: parseInt(lineVals[2]),
            ToHour: parseInt(lineVals[8]),
            Max: parseFloat(lineVals[3]),
            Min: parseFloat(lineVals[5]),
            Mean: parseFloat(lineVals[4])
        };
        tempStore.push(rec);
    }
    else if(weatherParam == 'tp'){
        let rec = {
            Timestamp: timestamp,
            Region: parseInt(lineVals[6]),
            DayOffset: parseInt(lineVals[1]),
            FromHour: parseInt(lineVals[2]),
            ToHour: parseInt(lineVals[8]),
            Cum: parseFloat(lineVals[0])
        };
        precipStore.push(rec);
    }
}

In [4]:
// load weather data
utils.readCsvFile('data/slovenia-nov2017_qminer.tsv', onLineWeather);

Read 30720 lines


### 2.2. Sales data import

In [5]:
let prodStore = base.store("Products");

// products file parsing logic
function onLineProducts(lineVals) {
    let rec = {
        ProductId: lineVals[0],
        ProductType: lineVals[1]
    };
    prodStore.push(rec);
}

In [6]:
// load products data
utils.readCsvFile('data/Products.tsv', onLineProducts);

Read 50 lines


In [7]:
let salesStore = base.store("Sales");

// sales file parsing logic
function onLineSales(lineVals) {
    let rec = {
        Timestamp: new Date(lineVals[2]),
        Quantity: parseInt(lineVals[1]),
        soldProduct: { ProductId: lineVals[0] }
    };
    salesStore.push(rec);
}


In [8]:
// load sales data
utils.readCsvFile('data/Sales.tsv', onLineSales);

Read 240 lines


### 2.3. Store data to disk

In [9]:
base.close();

## 3. Analytics

In [10]:
base = new qm.Base({ mode: 'openReadOnly' });
'base loaded'

'base loaded'

In [30]:
// select only air conditioners
acRecords = base.store("Products").allRecords.filter(x => x.ProductType == 'AC');

RecSet { weighted: false, empty: false, length: 21, store: [Getter] }

In [31]:
// get all sales data
sales = utils.getAllSales(acRecords);

[ [ 2017-11-08T14:24:10.000Z, 2 ],
  [ 2017-11-12T10:11:06.000Z, 3 ],
  [ 2017-11-27T12:18:25.000Z, 3 ],
  [ 2017-11-12T14:42:17.000Z, 3 ],
  [ 2017-11-02T12:49:15.000Z, 3 ],
  [ 2017-11-03T11:52:09.000Z, 3 ],
  [ 2017-11-30T14:37:16.000Z, 3 ],
  [ 2017-11-25T12:18:17.000Z, 2 ],
  [ 2017-11-10T13:38:48.000Z, 3 ],
  [ 2017-11-08T14:23:24.000Z, 2 ],
  [ 2017-11-22T10:45:14.000Z, 3 ],
  [ 2017-11-24T14:51:18.000Z, 2 ],
  [ 2017-11-16T12:59:47.000Z, 3 ],
  [ 2017-11-08T08:48:32.000Z, 1 ],
  [ 2017-11-07T13:22:28.000Z, 2 ],
  [ 2017-11-17T11:02:12.000Z, 3 ],
  [ 2017-11-22T08:42:40.000Z, 2 ],
  [ 2017-11-16T14:04:56.000Z, 1 ],
  [ 2017-11-20T07:22:07.000Z, 2 ],
  [ 2017-11-12T11:05:20.000Z, 2 ],
  [ 2017-11-12T07:01:04.000Z, 1 ],
  [ 2017-11-08T10:16:28.000Z, 1 ],
  [ 2017-11-23T13:30:29.000Z, 2 ],
  [ 2017-11-16T10:53:59.000Z, 1 ],
  [ 2017-11-14T09:48:35.000Z, 2 ],
  [ 2017-11-16T08:54:42.000Z, 3 ],
  [ 2017-11-01T09:04:33.000Z, 1 ],
  [ 2017-11-02T15:48:32.000Z, 2 ],
  [ 2017-11-21T14:48

In [28]:
startDate = new Date(2017, 11 - 1, 1);
endDate = new Date(2017, 11 - 1, 30);

2017-11-29T23:00:00.000Z

In [32]:
dailySales = utils.aggDaily(sales, startDate, endDate);

[ [ 2017-10-31T23:00:00.000Z, 4 ],
  [ 2017-11-01T23:00:00.000Z, 5 ],
  [ 2017-11-02T23:00:00.000Z, 4 ],
  [ 2017-11-03T23:00:00.000Z, 6 ],
  [ 2017-11-04T23:00:00.000Z, 3 ],
  [ 2017-11-05T23:00:00.000Z, 6 ],
  [ 2017-11-06T23:00:00.000Z, 7 ],
  [ 2017-11-07T23:00:00.000Z, 12 ],
  [ 2017-11-08T23:00:00.000Z, 9 ],
  [ 2017-11-09T23:00:00.000Z, 11 ],
  [ 2017-11-10T23:00:00.000Z, 8 ],
  [ 2017-11-11T23:00:00.000Z, 11 ],
  [ 2017-11-12T23:00:00.000Z, 1 ],
  [ 2017-11-13T23:00:00.000Z, 8 ],
  [ 2017-11-14T23:00:00.000Z, 9 ],
  [ 2017-11-15T23:00:00.000Z, 14 ],
  [ 2017-11-16T23:00:00.000Z, 13 ],
  [ 2017-11-17T23:00:00.000Z, 10 ],
  [ 2017-11-18T23:00:00.000Z, 6 ],
  [ 2017-11-19T23:00:00.000Z, 5 ],
  [ 2017-11-20T23:00:00.000Z, 6 ],
  [ 2017-11-21T23:00:00.000Z, 9 ],
  [ 2017-11-22T23:00:00.000Z, 7 ],
  [ 2017-11-23T23:00:00.000Z, 18 ],
  [ 2017-11-24T23:00:00.000Z, 3 ],
  [ 2017-11-25T23:00:00.000Z, 0 ],
  [ 2017-11-26T23:00:00.000Z, 7 ],
  [ 2017-11-27T23:00:00.000Z, 8 ],
  [ 2017-11-2

In [33]:
X = utils.extractFeatures(base, startDate, endDate);

Matrix { cols: 30, rows: 12 }

In [42]:
y = utils.extractPeakLabels(dailySales, 6);

Vector { length: 30, class: 'Vector' }

In [43]:
groups = utils.getGroups(dailySales.map(x => x[0]), x => x.getDate());

[ 0,
  1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29 ]

In [44]:
y_pred = new Array(y.length);

utils.leaveOneGroupOut(X, y, groups, function(x_train, y_train, x_test, y_test, train_ind, test_ind){
    let clf = new qm.analytics.SVC({c: 1.0, algorithm: 'LIBSVM', kernel: 'LINEAR' });
    
    // train the model
    clf.fit(x_train, y_train);
    
    // get predictions
    for(let i = 0;i < x_test.cols; i++)
        y_pred[test_ind[i]] = clf.predict(x_test.getCol(i));
});

// to qminer vector
y_pred = qm.la.Vector(y_pred);


Vector { length: 30, class: 'Vector' }

In [45]:
// analyze results
scores = new qm.analytics.metrics.ClassificationScore(y, y_pred).scores;
    
console.log("Precision: " + scores.precision());
console.log("Recall: " + scores.recall());

console.log("TP: " + scores.TP + " FP: " + scores.FP);
console.log("TN: " + scores.TN + " FN: " + scores.FN);

Precision: 0.55
Recall: 0.6470588235294118
TP: 11 FP: 9
TN: 4 FN: 6
