In [None]:
#!/usr/bin/env python3
import math
import numpy as np
import sys
def mean(data):

    return np.float64(sum(data)/len(data))


def stddev(data):

    m = mean(data)
    df = len(data) - 1

    s = 0
    for i in range(len(data)):
        s += math.pow(data[i] - m, 2)

    return np.float64(math.sqrt(s / df))


def covariance(v1, v2):

    if len(v1) != len(v2):
        raise ValueError('vectors of unequal length')

    m1 = mean(v1)
    m2 = mean(v2)

    s = 0
    for i in range(len(v1)):
        s += (v1[i] - m1) * (v2[i] - m2)

    return np.float64(s / len(v1))


class MultivariateNormal(object):

    def __init__(self, data):

        self.n_fields = len(data)
        self.n_datapoints = len(data[0])
        self.meanvector = np.zeros(shape=(self.n_fields,),
                                      dtype=np.float64)
        self.covmat = np.zeros(shape=(self.n_fields,
                                         self.n_fields),
                                  dtype=np.float64)
        self.__setvals(data)

    def __setvals(self, data):

        for i in range(self.n_fields):
            self.meanvector[i] = mean(data[i])

        for i in range(self.n_fields):
            for j in range(self.n_fields):
                self.covmat[i][j] = covariance(data[i], data[j])

    def likelihood(self, x):
        diff = x - self.meanvector
        diff = np.array([diff]).transpose()
        power = np.matmul(np.matmul(diff.transpose(), np.linalg.inv(self.covmat)), diff)[0][0] * (-1/2)
        exp = math.exp(power)
        const = 1 / math.sqrt(math.pow(2 * math.pi, self.n_fields) *
                              np.linalg.det(self.covmat))
        return const * exp


def featureVector(fin):
    selfcites, NonLocalCount, Totalcites, NLIQ, OCQ, HINDEX, IC = [[], [], [], [], [], [], []]
    for i in fin:
        i = i.split(',')
        if(len(i) > 7):
            i = i[:-1]
        i = [float(x) for x in i[1:]]        
        selfcites.append(i[0])
        NonLocalCount.append(i[1])
        Totalcites.append(i[2])
        NLIQ.append(i[3])
        OCQ.append(i[4])
        HINDEX.append(i[5])
        IC.append(i[6])

    # print(selfcites,NonLocalCount,Totalcites,NLIQ,OCQ,HINDEX,IC, sep="\n")
    features = np.array([np.array(i) for i in (selfcites, NonLocalCount, Totalcites,
                                                     NLIQ, OCQ, HINDEX, IC)])
    # print(features)
    return features

def getData(seed=0):
    np.random.seed(seed)
    finNat = open("nat.csv", "r").readlines()
    finInter = open("inter.csv", "r").readlines()
    np.random.shuffle(finNat)
    np.random.shuffle(finInter)

    finTest = finNat[:4] + finInter[:6]
    finNat = finNat[4:]
    finInter = finInter[6:]
    finTrain = finNat + finInter

    trainNatFV = featureVector(finNat)
    trainInterFV = featureVector(finInter)
    testFV = featureVector(finTest)
    trainFV = featureVector(finTrain)
    testNatFV = [] #testFV.transpose()[:4].transpose()
    testInterFV = [] #testFV.transpose()[4:].transpose()

    return (trainNatFV, trainInterFV, trainFV, testNatFV, testInterFV, testFV)

def main():

    (trainNatFV, trainInterFV, trainFV,
     testNatFV, testInterFV, testFV) = getData()

    normNat = MultivariateNormal(trainNatFV)
    normInter = MultivariateNormal(trainInterFV)

    out = []
    for x in testFV.transpose():
        if normNat.likelihood(x) > normInter.likelihood(x):
            out.append(1)
        else:
            out.append(0)

    print(out)

    return 0


if __name__ == '__main__':
    sys.exit(main())