<a href="https://colab.research.google.com/github/JosenildoVicente/projetoInteligenciaArtificial/blob/main/projeto_inteligencia_artificial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Importação das bibliotecas

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from scipy.spatial import distance
import statistics
import math
import time
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# Download dos dados

In [2]:
data = {}
data['pcr_balanced'] = pd.read_csv( 'https://raw.githubusercontent.com/JosenildoVicente/projetoInteligenciaArtificial/main/dataset/RT-PCR/pcr_balanced.csv', encoding="ISO-8859-1" )
data['pcr_unbalanced'] = pd.read_csv( 'https://raw.githubusercontent.com/JosenildoVicente/projetoInteligenciaArtificial/main/dataset/RT-PCR/pcr_unbalanced.csv', encoding="ISO-8859-1" )
data['rapid_balanced'] = pd.read_csv( 'https://raw.githubusercontent.com/JosenildoVicente/projetoInteligenciaArtificial/main/dataset/Rapid/rapid_balanced.csv', encoding="ISO-8859-1" )
data['rapid_unbalanced'] = pd.read_csv( 'https://raw.githubusercontent.com/JosenildoVicente/projetoInteligenciaArtificial/main/dataset/Rapid/rapid_unbalanced.csv', encoding="ISO-8859-1" )
data['both_test_balanced'] = pd.read_csv( 'https://raw.githubusercontent.com/JosenildoVicente/projetoInteligenciaArtificial/main/dataset/Both%20(complete)/both_test_balanced.csv', encoding="ISO-8859-1" )
data['both_test_unbalanced'] = pd.read_csv( 'https://raw.githubusercontent.com/JosenildoVicente/projetoInteligenciaArtificial/main/dataset/Both%20(complete)/both_test_unbalanced.csv', encoding="ISO-8859-1" )

#Algoritmos

##Criação do Decision Tree

In [3]:
def my_decisionTree(x_train,x_test,y_train):
  time_start = time.time()

  clf = DecisionTreeClassifier()
  clf = clf.fit(x_train,y_train)
  y_model = clf.predict(x_test)

  time_total = time.time() - time_start 

  return [y_model,time_total]

##Criação dos KNN

In [4]:
def calc_neighbors(k,x_train,x_test,sample):
  dists = {}
  ind = 0
  for sample_train in x_train.index:
    dist = distance.euclidean(x_train.loc[sample_train],x_test.loc[sample])
    dists[ind] = [dist, sample_train]
    ind+=1
  k_neigh = sorted(dists, key= dists.get)[:k]
  index_neigh = []
  for j in k_neigh:
    index_neigh.append(dists[j][1])
  return index_neigh

In [5]:
def calc_class(sample_neighbors,y_train):
  out = []

  for i in sample_neighbors:
    out.append(y_train[i])

  try:  
    result = statistics.mode(out)
  except:
    result = out[0]

  return result

In [6]:
def my_own_knn(k,x_train,x_test,y_train):
  time_start = time.time()

  y_model = []

  for sample in x_test.index:
    sample_neighbors = calc_neighbors(k,x_train,x_test,sample)

    sample_class = calc_class(sample_neighbors,y_train)

    y_model.append(sample_class)

  time_total = time.time() - time_start 

  return [y_model,time_total]

In [7]:
def my_knn(k,x_train,x_test,y_train):
  time_start = time.time()

  model = KNeighborsClassifier(n_neighbors=k, weights='uniform')
  model.fit(x_train, y_train)
  y_model = model.predict(x_test)
  
  time_total = time.time() - time_start 

  return [y_model,time_total]

##Criação do Random Forests

In [8]:
def my_randomForests(n,x_train,x_test,y_train):
  time_start = time.time()

  model = RandomForestClassifier(n_estimators=n)
  model.fit(x_train, y_train)
  y_model = model.predict(x_test)

  time_total = time.time() - time_start 

  return [y_model,time_total]

##Criação do SVM

In [9]:
def my_svm(x_train,x_test,y_train):
  time_start = time.time()

  model = SVC(kernel = 'linear', random_state = 0)
  
  model.fit(x_train, y_train)
  y_model = model.predict(x_test)

  time_total = time.time() - time_start 

  return [y_model,time_total]

#Rodar algoritmos

## Separação dos algoritmos

In [10]:
x_train, x_test, y_train, y_test = train_test_split(data['pcr_balanced'].drop('Class',axis=1),data['pcr_balanced']['Class'],test_size=0.25, stratify=data['pcr_balanced']['Class'])

## Funções para melhor separação do código

In [11]:
def print_metrics(result, y_test, time):
  print("   Tempo: ",time,"segundos")
  print("   Acurácia: ",accuracy_score(y_test, result))
  print("\n")

In [12]:
def run_my_knn(k, x_train, x_test, y_train, y_test):
  result,time = my_own_knn(k,x_train,x_test,y_train)
  print("Meu KNN,",k,"vizinhos:")
  print_metrics(result,y_test,time)

In [13]:
def run_knn(k, x_train, x_test, y_train, y_test):
  result,time = my_knn(k,x_train,x_test,y_train)
  print("KNN,",k,"vizinhos:")
  print_metrics(result,y_test,time)

In [14]:
def run_decisionTree(x_train, x_test, y_train, y_test):
  result,time = my_decisionTree(x_train,x_test,y_train)
  print("Decision Tree:")
  print_metrics(result,y_test,time)

In [15]:
def run_randomForest(n,x_train, x_test, y_train, y_test):
  result,time = my_randomForests(n,x_train,x_test,y_train)
  print("Random Forests, ",n,"arvoes:")
  print_metrics(result,y_test,time)

In [16]:
def run_svm(x_train, x_test, y_train, y_test):
  result,time = my_svm(x_train,x_test,y_train)
  print("SVM:")
  print_metrics(result,y_test,time)

## Resultados dos algoritmos

In [17]:
Ks = [1,3,5,7,9,11,13,15]
Ns = [100,300,500,700,900,1000]
for type_data in data.keys():

  print("\n","----- Rodando algoritmos com o dataset:",type_data,"-----\n")
  print("Quantidade total do dataset:",data[type_data].shape[0])
  print("quantidade de positivos:", data[type_data]['Class'][data[type_data]['Class']== 1].size)
  print("Quantidade de negativos:", data[type_data]['Class'][data[type_data]['Class']== 0].size,"\n")
  x_train, x_test, y_train, y_test = train_test_split(data[type_data].drop('Class',axis=1),data[type_data]['Class'],test_size=0.25, stratify=data[type_data]['Class'])

  # for i in Ks:
    # run_my_knn(1,x_train, x_test, y_train, y_test)

  for i in Ks:
    run_knn(i,x_train, x_test, y_train, y_test)

  run_decisionTree(x_train, x_test, y_train, y_test)

  for n in Ns:
    run_randomForest(n,x_train, x_test, y_train, y_test)

  run_svm(x_train, x_test, y_train, y_test)


 ----- Rodando algoritmos com o dataset: pcr_balanced -----

Quantidade total do dataset: 1832
quantidade de positivos: 916
Quantidade de negativos: 916 

KNN, 1 vizinhos:
   Tempo:  0.021926403045654297 segundos
   Acurácia:  0.9497816593886463


KNN, 3 vizinhos:
   Tempo:  0.024843692779541016 segundos
   Acurácia:  0.9432314410480349


KNN, 5 vizinhos:
   Tempo:  0.02928924560546875 segundos
   Acurácia:  0.9213973799126638


KNN, 7 vizinhos:
   Tempo:  0.028836727142333984 segundos
   Acurácia:  0.925764192139738


KNN, 9 vizinhos:
   Tempo:  0.02570652961730957 segundos
   Acurácia:  0.9235807860262009


KNN, 11 vizinhos:
   Tempo:  0.032793521881103516 segundos
   Acurácia:  0.9235807860262009


KNN, 13 vizinhos:
   Tempo:  0.03481864929199219 segundos
   Acurácia:  0.9235807860262009


KNN, 15 vizinhos:
   Tempo:  0.027601003646850586 segundos
   Acurácia:  0.9213973799126638


Decision Tree:
   Tempo:  0.00456690788269043 segundos
   Acurácia:  0.9563318777292577


Random Fore