In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import f1_score, confusion_matrix, ConfusionMatrixDisplay, classification_report
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt


### Convertendo caracteristicas do oBIFs extraidas de estogramas no matlab pra o dataset em python


In [50]:
file = open("locutor_dataset_obifs.csv", "w")

for i in range(1, 401, 1):
  df = pd.read_csv('extracted_data_with_matlab/image_%d.csv' % (i), header=None)
  df2 = df.sum(axis=1)
  class_of_instance = 1

  if i <= 50:
    class_of_instance = 1
  elif i <= 100:
    class_of_instance = 0
  elif i <= 150:
    class_of_instance = 1
  elif i <= 200:
    class_of_instance = 0
  elif i <= 250:
    class_of_instance = 1
  elif i <= 300:
    class_of_instance = 0
  elif i <= 350:
    class_of_instance = 1
  else:
    class_of_instance = 0

  df_with_class = pd.DataFrame(np.append(df2, [class_of_instance])).transpose()
  np.savetxt(file, df_with_class, delimiter=',')


file.close()

In [2]:
dataset_obifs = pd.read_csv('locutor_dataset_obifs.csv', header=None)
dataset_obifs

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,504,505,506,507,508,509,510,511,512,513
0,1932.0,1932.0,1932.0,1932.0,1932.0,1932.0,1932.0,1932.0,1932.0,1932.0,...,4923.0,8286.0,9682.0,7191.0,2829.0,2079.0,1967.0,1942.0,1956.0,1.0
1,1047.0,1047.0,1047.0,1047.0,1047.0,1047.0,1047.0,1047.0,1047.0,1047.0,...,1664.0,4296.0,6228.0,7213.0,3579.0,1416.0,1086.0,1059.0,1065.0,1.0
2,886.0,886.0,886.0,886.0,886.0,886.0,886.0,886.0,886.0,886.0,...,1405.0,4022.0,5101.0,5652.0,2408.0,1338.0,979.0,910.0,915.0,1.0
3,869.0,869.0,869.0,869.0,869.0,869.0,869.0,869.0,869.0,869.0,...,1488.0,3225.0,5096.0,5185.0,3205.0,1491.0,1273.0,1070.0,1115.0,1.0
4,965.0,965.0,965.0,965.0,965.0,965.0,965.0,965.0,965.0,965.0,...,1777.0,4703.0,5753.0,6703.0,1946.0,1179.0,965.0,965.0,965.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,1469.0,1469.0,1469.0,1469.0,1469.0,1469.0,1469.0,1469.0,1469.0,1469.0,...,2321.0,2825.0,3475.0,3052.0,3141.0,3030.0,3292.0,3683.0,3341.0,0.0
396,948.0,948.0,948.0,948.0,948.0,948.0,948.0,948.0,948.0,948.0,...,1785.0,2341.0,2095.0,2027.0,1795.0,2048.0,2483.0,2590.0,1950.0,0.0
397,1150.0,1150.0,1150.0,1150.0,1150.0,1150.0,1150.0,1150.0,1150.0,1150.0,...,2060.0,2749.0,2552.0,2359.0,2462.0,2835.0,2919.0,3117.0,2660.0,0.0
398,1006.0,1006.0,1006.0,1006.0,1006.0,1006.0,1006.0,1006.0,1006.0,1006.0,...,2087.0,2720.0,1868.0,1740.0,1705.0,1980.0,2241.0,2536.0,2196.0,0.0


### Carregando dataset de caracteristicas BSIF

In [4]:
dataset_bsif = pd.read_csv('locutor_dataset_bsif.csv', header=None)
dataset_bsif

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,247,248,249,250,251,252,253,254,255,256
0,0.001744,0.001801,0.002192,0.000303,0.001286,0.002834,0.000531,0.000333,0.000501,0.000725,...,0.000498,0.000485,0.001781,0.000184,0.000312,0.000969,0.002104,0.000260,0.001218,1
1,0.002787,0.002811,0.003137,0.000624,0.002385,0.004217,0.001084,0.000586,0.000912,0.001022,...,0.001346,0.000888,0.002769,0.000313,0.000560,0.001862,0.002934,0.000598,0.002117,1
2,0.002557,0.002761,0.003272,0.000447,0.002031,0.004845,0.001032,0.000585,0.000702,0.000968,...,0.001164,0.000827,0.002519,0.000350,0.000559,0.001597,0.002860,0.000722,0.001993,1
3,0.002147,0.002463,0.002681,0.000419,0.001754,0.003865,0.000680,0.000541,0.000581,0.000763,...,0.000805,0.000709,0.002324,0.000375,0.000440,0.001366,0.002308,0.000525,0.001274,1
4,0.002769,0.002588,0.002927,0.000608,0.002333,0.004377,0.000840,0.000648,0.000721,0.000913,...,0.000935,0.000753,0.002521,0.000329,0.000566,0.001640,0.002776,0.000610,0.001905,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,0.000766,0.001225,0.000849,0.000147,0.001150,0.002645,0.000261,0.000255,0.000137,0.000464,...,0.000334,0.000296,0.001365,0.000106,0.000238,0.000618,0.001279,0.000098,0.000726,0
396,0.000886,0.001211,0.000899,0.000121,0.001197,0.002613,0.000304,0.000325,0.000208,0.000417,...,0.000440,0.000294,0.001335,0.000134,0.000230,0.000757,0.001143,0.000189,0.001084,0
397,0.000966,0.001107,0.000988,0.000163,0.001127,0.002282,0.000295,0.000332,0.000185,0.000400,...,0.000520,0.000261,0.001278,0.000097,0.000227,0.000719,0.001354,0.000142,0.000941,0
398,0.000742,0.000909,0.000731,0.000167,0.000858,0.002263,0.000244,0.000316,0.000159,0.000409,...,0.000492,0.000238,0.001120,0.000081,0.000225,0.000667,0.001205,0.000182,0.000903,0
