# Machine Learning para DOA - Dregon Dataset

O objetivo aqui é utilizar regrassão para determinar a direção de chegada. Para isso, já extraí os delays referentes a todas as combinações de microfones do Dregon Dataset - Clean Speech.

O algoritmo usado para extrair os delays foi o delayDatasetCreator.py. Pode ser que a informação de todos os delays seja redundante, uma vez que o algoritmo determinístico usa só os delays em relação ao microfone da origem do espaço vetorial.

In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import BayesianRidge
from math import sqrt, cos, sin, pi
from sklearn.linear_model import LinearRegression, LassoLars, RANSACRegressor
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import mean_squared_error as mse, r2_score as r2

#### Abrindo o CSV com Pandas e separando as features das classificações 

O dataset tem as seguintes colunas:

Nome do arquivo WAV, Features, Azimutal Real, Elevação Real

In [24]:
datasetReal   = pd.read_csv("/home/dimi/Programming/IC2019/DOA/Datasets/dregonDelaysDataset.csv")
xReal         = np.array(datasetReal[datasetReal.columns[1:-2]])
yRealAzimutal = np.array(datasetReal[datasetReal.columns[-2]])
yRealElevacao = np.array(datasetReal[datasetReal.columns[-1]])

In [4]:
print(xReal[0], yRealAzimutal[0], yRealElevacao[0])

[ 12  12  22  18  17   7   6  -1  10   6   4  -5  -6  11   7   5  -5  -6
  -5  -6 -16 -17  -1 -11 -12  -9 -10  -1] 45 -30


#### Definindo algumas funções que vou precisar

In [5]:
def radParaGrau(angulo):
    return (angulo*180)/pi

def grauParaRad(angulo):
    return (angulo*pi)/180

def vetorUnitario(vetor):
    return vetor/np.linalg.norm(vetor)

def tempoParaAmostras(tempo, freqAmostragem):
    return tempo * freqAmostragem

## Treinando com dados reais

#### Azimutal

In [17]:
# SEPARANDO EM DADOS DE TREINAMENTO E TESTE
xTrain, xTest, yTrain, yTest = tts(xReal, yRealAzimutal, test_size=0.25)

# LINEAR REGRESSION
objLS   = LinearRegression().fit(xTrain, yTrain)
yPredLS = objLS.predict(xTest)

# LASSO LARS
objLL   = LassoLars().fit(xTrain, yTrain)
yPredLL = objLL.predict(xTest)

# BAYESIAN RIDGE
objBR   = BayesianRidge().fit(xTrain, yTrain)
yPredBR = objBR.predict(xTest)

print("Real\tLinear Regression\tLasso Lars\t\tBayesian Ridge")

for i, predicaoLS in enumerate(yPredLS):
    print(yTest[i], "\t", predicaoLS, "\t", yPredLL[i], "\t", yPredBR[i])

Real	Linear Regression	Lasso Lars		Bayesian Ridge
45 	 44.12428479149867 	 61.59971889386616 	 43.955575211775056
45 	 44.82664002901951 	 61.59971889386616 	 44.44585408863752
90 	 89.70699475258446 	 73.65686821035233 	 89.82492556619573
60 	 59.63743303917649 	 65.61876866602822 	 59.20629375755856
45 	 45.04303034252183 	 61.59971889386616 	 44.74391376395034
90 	 90.0977858315417 	 74.46067816478474 	 90.48414771753548
90 	 90.60786860733792 	 73.65686821035233 	 90.20702862558146
90 	 89.66150832970678 	 72.85305825591992 	 88.84137254298837
60 	 59.18838304522025 	 65.61876866602822 	 59.37412297538026
60 	 59.93213464423339 	 65.61876866602822 	 59.8907542752101
90 	 87.9812163666251 	 72.85305825591992 	 87.861964594178
75 	 76.60335997937466 	 70.44162839262269 	 76.51753258522278
60 	 59.517862720755446 	 65.61876866602822 	 59.940547965483205
90 	 88.38639522766967 	 72.85305825591992 	 88.28033181095799
75 	 75.64230316512743 	 69.63781843819028 	 75.72393112692437
60 	 60

#### Elevação

In [20]:
# SEPARANDO EM DADOS DE TREINAMENTO E TESTE
xTrain, xTest, yTrain, yTest = tts(xReal, yRealElevacao, test_size=0.25)

# LINEAR REGRESSION
objLS   = LinearRegression().fit(xTrain, yTrain)
yPredLS = objLS.predict(xTest)

# LASSO LARS
objLL   = LassoLars().fit(xTrain, yTrain)
yPredLL = objLL.predict(xTest)

# BAYESIAN RIDGE
objBR   = BayesianRidge().fit(xTrain, yTrain)
yPredBR = objBR.predict(xTest)

print("Real\tLinear Regression\tLasso Lars\t\tBayesian Ridge")

for i, predicaoLS in enumerate(yPredLS):
    print(yTest[i], "\t", predicaoLS, "\t", yPredLL[i], "\t", yPredBR[i])

Real	Linear Regression	Lasso Lars		Bayesian Ridge
0 	 1.637004053696506 	 -15.095071021761562 	 1.5104158967250605
0 	 0.6248846631578129 	 -15.095071021761562 	 0.5006032050034506
0 	 -0.30090790774862697 	 -15.09168659575036 	 -0.32552477895733034
-15 	 -15.146469863800768 	 -15.105224299795166 	 -15.201027135890898
0 	 0.7124227729550441 	 -15.09168659575036 	 0.728792083475021
-15 	 -14.43816845068825 	 -15.101839873783966 	 -14.44972018751783
-15 	 -15.548460607191803 	 -15.101839873783966 	 -15.538266470870514
0 	 -1.0066340383906631 	 -15.098455447772764 	 -1.3804850004921008
-15 	 -16.157394886384555 	 -15.101839873783966 	 -16.277575843608126
-30 	 -30.59787317606389 	 -15.11199315181757 	 -30.52098206983651
-15 	 -16.637250101519427 	 -15.101839873783966 	 -16.50196348089158
-15 	 -13.996247124933616 	 -15.101839873783966 	 -14.013740191446537
0 	 -0.7621729194328761 	 -15.09168659575036 	 -0.8532769321805702
-15 	 -14.638384132232545 	 -15.101839873783966 	 -14.5916325889019

## Treinando com dados fictícios

In [41]:
datasetFicticio   = pd.read_csv("/home/dimi/Programming/IC2019/DOA/Datasets/datasetFicticio090Float.csv")
xFicticio         = np.array(datasetFicticio[datasetFicticio.columns[0:-2]])
yFicticioAzimutal = np.array(datasetFicticio[datasetFicticio.columns[-2]])
yFicticioElevacao = np.array(datasetFicticio[datasetFicticio.columns[-1]])

In [42]:
print(xFicticio[0], yFicticioAzimutal[0], yFicticioElevacao[0])

[ 1.06358824e+01  8.22017150e-16  1.06358824e+01  6.67144353e-16
  1.06358824e+01 -1.54872796e-16  1.06358824e+01 -1.06358824e+01
  1.54872796e-16 -1.06358824e+01 -6.67144353e-16 -1.06358824e+01
 -8.22017150e-16  1.06358824e+01 -1.54872796e-16  1.06358824e+01
 -9.76889946e-16  1.06358824e+01 -1.06358824e+01 -8.22017150e-16
 -1.06358824e+01 -9.76889946e-16  1.06358824e+01 -8.22017150e-16
  1.06358824e+01 -1.06358824e+01 -1.54872796e-16  1.06358824e+01] 0 -90


In [43]:
# SEPARANDO EM DADOS DE TREINAMENTO E TESTE
#xTrain, xTest, yTrain, yTest = tts(xReal, yRealAzimutal, test_size=0.25)
xTrain = xFicticio
xTest  = xReal
yTrain = yFicticioElevacao
yTest  = yRealElevacao

# LINEAR REGRESSION
objLS   = LinearRegression().fit(xTrain, yTrain)
yPredLS = objLS.predict(xTest)

# LASSO LARS
objLL   = LassoLars().fit(xTrain, yTrain)
yPredLL = objLL.predict(xTest)

# BAYESIAN RIDGE
objBR   = BayesianRidge().fit(xTrain, yTrain)
yPredBR = objBR.predict(xTest)

print("Real\tLinear Regression\tLasso Lars\t\tBayesian Ridge")

for i, predicaoLS in enumerate(yPredLS):
    print(yTest[i], "\t", predicaoLS, "\t", yPredLL[i], "\t", yPredBR[i])

Real	Linear Regression	Lasso Lars		Bayesian Ridge
-30 	 -23.863510756792376 	 -29.510587977833964 	 -23.8464651313785
-15 	 -19.368802877933824 	 -27.48441336375572 	 -19.39472266315617
-30 	 -25.18451719002621 	 -29.510587977833964 	 -25.166385604357355
-15 	 -11.991858881535126 	 -22.061562596553134 	 -12.000993429553002
0 	 3.9147526437652793 	 -11.227692139944324 	 3.888618926650068
-30 	 -25.34956758772076 	 -29.510587977833964 	 -25.334539372900068
0 	 6.817398708436141 	 -7.842847064616343 	 6.792413927381645
-15 	 -7.094350107065285 	 -17.997382290600285 	 -7.0909771264974175
0 	 -4.629751404070863 	 -17.32987813777178 	 -4.6794187147201285
-15 	 -19.517894669992472 	 -27.48441336375572 	 -19.545517603907662
-15 	 -8.33240309063304 	 -17.997382290600285 	 -8.333211042500789
-15 	 -8.927491673963061 	 -18.676717521225154 	 -8.928289758817481
-30 	 -29.23543544570527 	 -33.57476828378681 	 -29.22788508652852
-15 	 -7.285515708082556 	 -17.997382290600285 	 -7.282577698955489
-15 