In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
from sklearn.preprocessing import LabelEncoder
import os
import pyodbc

## SQL Connection

In [2]:
# Had to increase timeout
connection = pyodbc.connect('Driver={SQL Server};'
'Server=LAPTOP-E8KMOCEO;'
'Database=RepuestosWeb;'
'Trusted_Connection=yes;', timeout=5000)

## CantidadCotizacionEncimaPromedio

### Data preparation

In [45]:
#create dataframe
df_avgCantCotizacion = pd.read_sql('select * from VW_CantidadCotizacionEncimaPromedio', connection)
df_avgCantCotizacion.head(5)

Unnamed: 0,ID_Categoria,ID_Parte,Ciudad,TotalEncimaPromedio
0,896,*681MISC,El Paso,0
1,2079,AC1029100-ORIG.REC,Arlington,0
2,455,ABP1995UK2,Raleigh,0
3,434,ABPC4243,Los Angeles,1
4,1274,ABP5747,Las Vegas,0


In [46]:
#variables
x_avgCantCotizacion = df_avgCantCotizacion.iloc[:, 0:3].values
y_avgCantCotizacion = df_avgCantCotizacion.iloc[:, -1].values

In [47]:
#Ecoding
le = LabelEncoder()
x_avgCantCotizacion[:,0] = le.fit_transform(x_avgCantCotizacion[:,0])
x_avgCantCotizacion[:,1] = le.fit_transform(x_avgCantCotizacion[:,1])
x_avgCantCotizacion[:,2] = le.fit_transform(x_avgCantCotizacion[:,2])

In [48]:
#Data splitting
from sklearn.model_selection import train_test_split
(x_avgCantCotizacion_train,
x_avgCantCotizacion_test,
y_avgCantCotizacion_train,
y_avgCantCotizacion_test) = train_test_split(x_avgCantCotizacion, y_avgCantCotizacion, test_size=0.20, random_state=0)

### Decision Tree

In [54]:
#Create decision tree
from sklearn.tree import DecisionTreeClassifier
DT_avgCantCotizacion_classifier = DecisionTreeClassifier()
DT_avgCantCotizacion_classifier.fit(x_avgCantCotizacion_train, y_avgCantCotizacion_train)

#Predict
DT_avgCantCotizacion_pred = DT_avgCantCotizacion_classifier.predict(x_avgCantCotizacion_test)
DT_avgCantCotizacion_pred

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [55]:
#Create confusion and precision matrix
from sklearn.metrics import confusion_matrix, accuracy_score
avgCantCotizacion_cm = confusion_matrix(y_avgCantCotizacion_test, DT_avgCantCotizacion_pred)
avgCantCotizacion_ac = accuracy_score(y_avgCantCotizacion_test, DT_avgCantCotizacion_pred)

In [56]:
#Confusion matrix
avgCantCotizacion_cm

array([[2053,  195],
       [ 193,   17]], dtype=int64)

In [57]:
#Accuracy
avgCantCotizacion_ac

0.8421480878763222

### Random Forest

In [63]:
from sklearn.ensemble import RandomForestClassifier
import sklearn.metrics as metrics

In [64]:
#Create random forest
RF_avgCantCotizacion_classifier = RandomForestClassifier(criterion = 'entropy', random_state = 42)
RF_avgCantCotizacion_classifier.fit(x_avgCantCotizacion_train, y_avgCantCotizacion_train)

RandomForestClassifier(criterion='entropy', random_state=42)

In [65]:
#Predict
RF_avgCantCotizacion_pred = RF_avgCantCotizacion_classifier.predict(x_avgCantCotizacion_test)

In [66]:
#Accuracy
print('Testing Set Evaluation F1-Score=>',metrics.accuracy_score(y_avgCantCotizacion_test,RF_avgCantCotizacion_pred))

Testing Set Evaluation F1-Score=> 0.9080553295362083


## CantidadPartesEncimaPromedio

### Data preparation

In [None]:
#create dataframe
df_avgCantPartes = pd.read_sql('select * from VW_CantidadPartesEncimaPromedio', connection)
df_avgCantPartes.head(5)

Unnamed: 0,NombreCiudad,Genero,ID_Parte,ID_Categoria,TotalCantidadEncimaPromedio
0,San Diego,M,AC1100173CORE-ORIG.REC,1612,0
1,Tacoma,F,AC1320108-ORIG.REC,2827,0
2,Akron,M,AC1228119-ORIG.REC,3154,0
3,Richmond,M,AC1217107,3225,0
4,Dallas,F,AC2519105-ORIG.REC,4241,0


In [None]:
#variables
x_avgCantPartes = df_avgCantPartes.iloc[:, 0:4].values
y_avgCantPartes = df_avgCantPartes.iloc[:, -1].values

In [None]:
#Ecoding
le = LabelEncoder()
x_avgCantPartes[:,0] = le.fit_transform(x_avgCantPartes[:,0])
x_avgCantPartes[:,1] = le.fit_transform(x_avgCantPartes[:,1])
x_avgCantPartes[:,2] = le.fit_transform(x_avgCantPartes[:,2])
x_avgCantPartes[:,3] = le.fit_transform(x_avgCantPartes[:,3])

In [None]:
#Data splitting
from sklearn.model_selection import train_test_split
(x_avgCantPartes_train,
x_avgCantPartes_test,
y_avgCantPartes_train,
y_avgCantPartes_test) = train_test_split(x_avgCantPartes, y_avgCantPartes, test_size=0.20, random_state=0)

### Decision Tree

In [67]:
#Create decision tree
from sklearn.tree import DecisionTreeClassifier
DT_avgCantPartes_classifier = DecisionTreeClassifier()
DT_avgCantPartes_classifier.fit(x_avgCantPartes_train, y_avgCantPartes_train)

#Predict
DT_avgCantPartes_pred = DT_avgCantPartes_classifier.predict(x_avgCantPartes_test)
DT_avgCantPartes_pred

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [68]:
#Create confusion and precision matrix
from sklearn.metrics import confusion_matrix, accuracy_score
avgCantPartes_cm = confusion_matrix(y_avgCantPartes_test, DT_avgCantPartes_pred)
avgCantPartes_ac = accuracy_score(y_avgCantPartes_test, DT_avgCantPartes_pred)

In [69]:
#Confusion matrix
avgCantPartes_cm

array([[1958,  221],
       [ 177,   26]], dtype=int64)

In [70]:
#Accuracy
avgCantPartes_ac

0.832913518052057

### Random Forest

In [71]:
from sklearn.ensemble import RandomForestClassifier
import sklearn.metrics as metrics

In [72]:
#Create random forest
RF_avgCantPartes_classifier = RandomForestClassifier(criterion = 'entropy', random_state = 42)
RF_avgCantPartes_classifier.fit(x_avgCantPartes_train, y_avgCantPartes_train)

RandomForestClassifier(criterion='entropy', random_state=42)

In [73]:
#Predict
RF_avgCantPartes_pred = RF_avgCantPartes_classifier.predict(x_avgCantPartes_test)

In [74]:
#Accuracy
print('Testing Set Evaluation F1-Score=>',metrics.accuracy_score(y_avgCantPartes_test,RF_avgCantPartes_pred))

Testing Set Evaluation F1-Score=> 0.9139378673383711


## PorcentajeDescuentoEncimaPromedio

### Data preparation

In [None]:
#create dataframe
df_avgPorcentajeDescuento = pd.read_sql('select * from VW_PorcentajeDescuentoEncimaPromedio', connection)
df_avgPorcentajeDescuento.head(5)

Unnamed: 0,ID_Parte,ID_Categoria,Nombre,TotalEncimaPromedio
0,AC1183100,1511,Spokane,1
1,AC2502112N,1831,Colorado,1
2,AC1200121,2080,Tulsa,1
3,222ADJUST,6,Memphis,0
4,AC1000148U-ORIG.REC,1235,Milwaukee,1


In [None]:
#variables
x_avgPorcentajeDescuento = df_avgPorcentajeDescuento.iloc[:, 0:3].values
y_avgPorcentajeDescuento = df_avgPorcentajeDescuento.iloc[:, -1].values

In [None]:
#Ecoding
le = LabelEncoder()
x_avgPorcentajeDescuento[:,0] = le.fit_transform(x_avgPorcentajeDescuento[:,0])
x_avgPorcentajeDescuento[:,1] = le.fit_transform(x_avgPorcentajeDescuento[:,1])
x_avgPorcentajeDescuento[:,2] = le.fit_transform(x_avgPorcentajeDescuento[:,2])

In [None]:
#Data splitting
from sklearn.model_selection import train_test_split
(x_avgPorcentajeDescuento_train,
x_avgPorcentajeDescuento_test,
y_avgPorcentajeDescuento_train,
y_avgPorcentajeDescuento_test) = train_test_split(x_avgPorcentajeDescuento, y_avgPorcentajeDescuento, test_size=0.20, random_state=0)

### Decision Tree

In [75]:
#Create decision tree
from sklearn.tree import DecisionTreeClassifier
DT_avgPorcentajeDescuento_classifier = DecisionTreeClassifier()
DT_avgPorcentajeDescuento_classifier.fit(x_avgPorcentajeDescuento_train, y_avgPorcentajeDescuento_train)

#Predict
DT_avgPorcentajeDescuento_pred = DT_avgPorcentajeDescuento_classifier.predict(x_avgPorcentajeDescuento_test)
DT_avgPorcentajeDescuento_pred

array([1, 0, 0, ..., 0, 1, 1], dtype=int64)

In [76]:
#Create confusion and precision matrix
from sklearn.metrics import confusion_matrix, accuracy_score
avgPorcentajeDescuento_cm = confusion_matrix(y_avgPorcentajeDescuento_test, DT_avgPorcentajeDescuento_pred)
avgPorcentajeDescuento_ac = accuracy_score(y_avgPorcentajeDescuento_test, DT_avgPorcentajeDescuento_pred)

In [77]:
#Confusion matrix
avgPorcentajeDescuento_cm

array([[403, 618],
       [607, 857]], dtype=int64)

In [78]:
#Accuracy
avgPorcentajeDescuento_ac

0.5070422535211268

### Random Forest

In [79]:
from sklearn.ensemble import RandomForestClassifier
import sklearn.metrics as metrics

In [80]:
#Create random forest
RF_avgPorcentajeDescuento_classifier = RandomForestClassifier(criterion = 'entropy', random_state = 42)
RF_avgPorcentajeDescuento_classifier.fit(x_avgPorcentajeDescuento_train, y_avgPorcentajeDescuento_train)

RandomForestClassifier(criterion='entropy', random_state=42)

In [81]:
#Predict
RF_avgPorcentajeDescuento_pred = RF_avgPorcentajeDescuento_classifier.predict(x_avgPorcentajeDescuento_test)

In [82]:
#Accuracy
print('Testing Set Evaluation F1-Score=>',metrics.accuracy_score(y_avgPorcentajeDescuento_test,RF_avgPorcentajeDescuento_pred))

Testing Set Evaluation F1-Score=> 0.5142857142857142


## TotalOrdenesEncimaPromedio

### Data preparation

In [None]:
#create dataframe
df_avgOrdenes = pd.read_sql('select * from VW_TotalOrdenesEncimaPromedio', connection)
df_avgOrdenes.head(5)

Unnamed: 0,NombreCiudad,Genero,ID_Parte,ID_Categoria,TotalEncimaPromedio
0,San Diego,M,AC1100173CORE-ORIG.REC,1612,1
1,Tacoma,F,AC1320108-ORIG.REC,2827,0
2,Akron,M,AC1228119-ORIG.REC,3154,0
3,Richmond,M,AC1217107,3225,1
4,Dallas,F,AC2519105-ORIG.REC,4241,0


In [None]:
#variables
x_avgOrdenes = df_avgOrdenes.iloc[:, 0:4].values
y_avgOrdenes = df_avgOrdenes.iloc[:, -1].values

In [None]:
#Ecoding
le = LabelEncoder()
x_avgOrdenes[:,0] = le.fit_transform(x_avgOrdenes[:,0])
x_avgOrdenes[:,1] = le.fit_transform(x_avgOrdenes[:,1])
x_avgOrdenes[:,2] = le.fit_transform(x_avgOrdenes[:,2])
x_avgOrdenes[:,3] = le.fit_transform(x_avgOrdenes[:,3])

In [None]:
#Data splitting
from sklearn.model_selection import train_test_split
(x_avgOrdenes_train,
x_avgOrdenes_test,
y_avgOrdenes_train,
y_avgOrdenes_test) = train_test_split(x_avgOrdenes, y_avgOrdenes, test_size=0.20, random_state=0)

### Decision Tree

In [83]:
#Create decision tree
from sklearn.tree import DecisionTreeClassifier
DT_avgOrdenes_classifier = DecisionTreeClassifier()
DT_avgOrdenes_classifier.fit(x_avgOrdenes_train, y_avgOrdenes_train)

#Predict
DT_avgOrdenes_pred = DT_avgOrdenes_classifier.predict(x_avgOrdenes_test)
DT_avgOrdenes_pred

array([1, 1, 1, ..., 1, 0, 1], dtype=int64)

In [84]:
#Create confusion and precision matrix
from sklearn.metrics import confusion_matrix, accuracy_score
avgOrdenes_cm = confusion_matrix(y_avgOrdenes_test, DT_avgOrdenes_pred)
avgOrdenes_ac = accuracy_score(y_avgOrdenes_test, DT_avgOrdenes_pred)

In [85]:
#Confusion matrix
avgOrdenes_cm

array([[606, 595],
       [590, 591]], dtype=int64)

In [86]:
#Accuracy
avgOrdenes_ac

0.5025188916876574

### Random Forest

In [87]:
from sklearn.ensemble import RandomForestClassifier
import sklearn.metrics as metrics

In [88]:
#Create random forest
RF_avgOrdenes_classifier = RandomForestClassifier(criterion = 'entropy', random_state = 42)
RF_avgOrdenes_classifier.fit(x_avgOrdenes_train, y_avgOrdenes_train)

RandomForestClassifier(criterion='entropy', random_state=42)

In [89]:
#Predict
RF_avgOrdenes_pred = RF_avgOrdenes_classifier.predict(x_avgOrdenes_test)

In [90]:
#Accuracy
print('Testing Set Evaluation F1-Score=>',metrics.accuracy_score(y_avgOrdenes_test,RF_avgOrdenes_pred))

Testing Set Evaluation F1-Score=> 0.5020990764063812


## OrdenRealizadaCotizacion

### Data preparation

In [None]:
#create dataframe
df_ordenRealizadaCotizacion = pd.read_sql('select * from VW_OrdenRealizadaCotizacion', connection)
df_ordenRealizadaCotizacion.head(5)

Unnamed: 0,ID_Categoria,ID_Parte,ProcesadoPor,IDAseguradora,OrdenRealizada
0,727,*220MISC,Aseguradora,438,False
1,769,*222MISC,Aseguradora,438,False
2,559,*223MISC,Aseguradora,880,False
3,207,*224MISC,Call center,880,False
4,1004,*225MISC,Aseguradora,1278,False


In [None]:
#variables
x_ordenRealizadaCotizacion = df_ordenRealizadaCotizacion.iloc[:, 0:4].values
y_ordenRealizadaCotizacion = df_ordenRealizadaCotizacion.iloc[:, -1].values

In [None]:
#Ecoding
le = LabelEncoder()
x_ordenRealizadaCotizacion[:,0] = le.fit_transform(x_ordenRealizadaCotizacion[:,0])
x_ordenRealizadaCotizacion[:,1] = le.fit_transform(x_ordenRealizadaCotizacion[:,1])
x_ordenRealizadaCotizacion[:,2] = le.fit_transform(x_ordenRealizadaCotizacion[:,2])
x_ordenRealizadaCotizacion[:,3] = le.fit_transform(x_ordenRealizadaCotizacion[:,3])

In [None]:
#Data splitting
from sklearn.model_selection import train_test_split
(x_ordenRealizadaCotizacion_train,
x_ordenRealizadaCotizacion_test,
y_ordenRealizadaCotizacion_train,
y_ordenRealizadaCotizacion_test) = train_test_split(x_ordenRealizadaCotizacion, y_ordenRealizadaCotizacion, test_size=0.20, random_state=0)

### Decision Tree

In [91]:
#Create decision tree
from sklearn.tree import DecisionTreeClassifier
DT_ordenRealizadaCotizacion_classifier = DecisionTreeClassifier()
DT_ordenRealizadaCotizacion_classifier.fit(x_ordenRealizadaCotizacion_train, y_ordenRealizadaCotizacion_train)

#Predict
DT_ordenRealizadaCotizacion_pred = DT_ordenRealizadaCotizacion_classifier.predict(x_ordenRealizadaCotizacion_test)
DT_ordenRealizadaCotizacion_pred

array([False,  True, False, ...,  True,  True, False])

In [92]:
#Create confusion and precision matrix
from sklearn.metrics import confusion_matrix, accuracy_score
ordenRealizadaCotizacion_cm = confusion_matrix(y_ordenRealizadaCotizacion_test, DT_ordenRealizadaCotizacion_pred)
ordenRealizadaCotizacion_ac = accuracy_score(y_ordenRealizadaCotizacion_test, DT_ordenRealizadaCotizacion_pred)

In [93]:
#Confusion matrix
ordenRealizadaCotizacion_cm

array([[887, 604],
       [612, 380]], dtype=int64)

In [94]:
#Accuracy
ordenRealizadaCotizacion_ac

0.5102698348771647

### Random Forest

In [95]:
from sklearn.ensemble import RandomForestClassifier
import sklearn.metrics as metrics

In [96]:
#Create random forest
RF_ordenRealizadaCotizacion_classifier = RandomForestClassifier(criterion = 'entropy', random_state = 42)
RF_ordenRealizadaCotizacion_classifier.fit(x_ordenRealizadaCotizacion_train, y_ordenRealizadaCotizacion_train)

RandomForestClassifier(criterion='entropy', random_state=42)

In [100]:
#Predict
RF_ordenRealizadaCotizacion_pred = RF_ordenRealizadaCotizacion_classifier.predict(x_ordenRealizadaCotizacion_test)

In [102]:
#Accuracy
print('Testing Set Evaluation F1-Score=>',metrics.accuracy_score(y_ordenRealizadaCotizacion_test,RF_ordenRealizadaCotizacion_pred))

Testing Set Evaluation F1-Score=> 0.5573902537253322


## PorcentajeBajo

### Data preparation

In [None]:
#create dataframe
df_avgPorcentajeDescuentoBajo = pd.read_sql('select * from VW_PorcentajeBajo', connection)
df_avgPorcentajeDescuentoBajo.head(5)

Unnamed: 0,ID_Parte,ID_Categoria,Nombre,PorcentajeBajo
0,AC1183100,1511,Spokane,0
1,AC2502112N,1831,Colorado,1
2,AC1200121,2080,Tulsa,1
3,222ADJUST,6,Memphis,1
4,AC1000148U-ORIG.REC,1235,Milwaukee,0


In [None]:
#variables
x_avgPorcentajeDescuentoBajo = df_avgPorcentajeDescuentoBajo.iloc[:, 0:3].values
y_avgPorcentajeDescuentoBajo = df_avgPorcentajeDescuentoBajo.iloc[:, -1].values

In [None]:
#Ecoding
le = LabelEncoder()
x_avgPorcentajeDescuentoBajo[:,0] = le.fit_transform(x_avgPorcentajeDescuentoBajo[:,0])
x_avgPorcentajeDescuentoBajo[:,1] = le.fit_transform(x_avgPorcentajeDescuentoBajo[:,1])
x_avgPorcentajeDescuentoBajo[:,2] = le.fit_transform(x_avgPorcentajeDescuentoBajo[:,2])

In [None]:
#Data splitting
from sklearn.model_selection import train_test_split
(x_avgPorcentajeDescuentoBajo_train,
x_avgPorcentajeDescuentoBajo_test,
y_avgPorcentajeDescuentoBajo_train,
y_avgPorcentajeDescuentoBajo_test) = train_test_split(x_avgPorcentajeDescuentoBajo, y_avgPorcentajeDescuentoBajo, test_size=0.20, random_state=0)

### Decision Tree

In [103]:
#Create decision tree
from sklearn.tree import DecisionTreeClassifier
DT_avgPorcentajeDescuentoBajo_classifier = DecisionTreeClassifier()
DT_avgPorcentajeDescuentoBajo_classifier.fit(x_avgPorcentajeDescuentoBajo_train, y_avgPorcentajeDescuentoBajo_train)

#Predict
DT_avgPorcentajeDescuentoBajo_pred = DT_avgPorcentajeDescuentoBajo_classifier.predict(x_avgPorcentajeDescuentoBajo_test)
DT_avgPorcentajeDescuentoBajo_pred

array([1, 0, 0, ..., 1, 1, 1], dtype=int64)

In [104]:
#Create confusion and precision matrix
from sklearn.metrics import confusion_matrix, accuracy_score
avgPorcentajeDescuentoBajo_cm = confusion_matrix(y_avgPorcentajeDescuentoBajo_test, DT_avgPorcentajeDescuentoBajo_pred)
avgPorcentajeDescuentoBajo_ac = accuracy_score(y_avgPorcentajeDescuentoBajo_test, DT_avgPorcentajeDescuentoBajo_pred)

In [105]:
#Confusion matrix
avgPorcentajeDescuentoBajo_cm

array([[380, 622],
       [563, 920]], dtype=int64)

In [106]:
#Accuracy
avgPorcentajeDescuentoBajo_ac

0.5231388329979879

### Random Forest

In [107]:
from sklearn.ensemble import RandomForestClassifier
import sklearn.metrics as metrics

In [108]:
#Create random forest
RF_avgPorcentajeDescuentoBajo_classifier = RandomForestClassifier(criterion = 'entropy', random_state = 42)
RF_avgPorcentajeDescuentoBajo_classifier.fit(x_avgPorcentajeDescuentoBajo_train, y_avgPorcentajeDescuentoBajo_train)

RandomForestClassifier(criterion='entropy', random_state=42)

In [109]:
#Predict
RF_avgPorcentajeDescuentoBajo_pred = RF_avgPorcentajeDescuentoBajo_classifier.predict(x_avgPorcentajeDescuentoBajo_test)

In [110]:
#Accuracy
print('Testing Set Evaluation F1-Score=>',metrics.accuracy_score(y_avgPorcentajeDescuentoBajo_test,RF_avgPorcentajeDescuentoBajo_pred))

Testing Set Evaluation F1-Score=> 0.5203219315895372


## VehiculoModerno

### Data preparation

In [112]:
#create dataframe
df_vehiculoModerno = pd.read_sql('select * from VW_VehiculoModerno', connection)
df_vehiculoModerno.head(5)

Unnamed: 0,ID_Parte,ID_Categoria,Marca,VehiculoModerno
0,ALT13735R,5106,GMC,0
1,ACK011820R,6788,Chevrolet,1
2,ACK011121,7172,Lexus,1
3,ACK013043,9004,Toyota,1
4,ACK012339,7330,Mazda,1


In [113]:
#variables
x_vehiculoModerno = df_vehiculoModerno.iloc[:, 0:3].values
y_vehiculoModerno = df_vehiculoModerno.iloc[:, -1].values

In [114]:
#Ecoding
le = LabelEncoder()
x_vehiculoModerno[:,0] = le.fit_transform(x_vehiculoModerno[:,0])
x_vehiculoModerno[:,1] = le.fit_transform(x_vehiculoModerno[:,1])
x_vehiculoModerno[:,2] = le.fit_transform(x_vehiculoModerno[:,2])

In [115]:
#Data splitting
from sklearn.model_selection import train_test_split
(x_vehiculoModerno_train,
x_vehiculoModerno_test,
y_vehiculoModerno_train,
y_vehiculoModerno_test) = train_test_split(x_vehiculoModerno, y_vehiculoModerno, test_size=0.20, random_state=0)

### Decision Tree

In [116]:
#Create decision tree
from sklearn.tree import DecisionTreeClassifier
DT_vehiculoModerno_classifier = DecisionTreeClassifier()
DT_vehiculoModerno_classifier.fit(x_vehiculoModerno_train, y_vehiculoModerno_train)

#Predict
DT_vehiculoModerno_pred = DT_vehiculoModerno_classifier.predict(x_vehiculoModerno_test)
DT_vehiculoModerno_pred

array([0, 1, 1, ..., 1, 0, 1], dtype=int64)

In [117]:
#Create confusion and precision matrix
from sklearn.metrics import confusion_matrix, accuracy_score
vehiculoModerno_cm = confusion_matrix(y_vehiculoModerno_test, DT_vehiculoModerno_pred)
vehiculoModerno_ac = accuracy_score(y_vehiculoModerno_test, DT_vehiculoModerno_pred)

In [118]:
#Confusion matrix
vehiculoModerno_cm

array([[ 431,  476],
       [ 511, 1107]], dtype=int64)

In [119]:
#Accuracy
vehiculoModerno_ac

0.6091089108910891

### Random Forest

In [120]:
from sklearn.ensemble import RandomForestClassifier
import sklearn.metrics as metrics

In [121]:
#Create random forest
RF_vehiculoModerno_classifier = RandomForestClassifier(criterion = 'entropy', random_state = 42)
RF_vehiculoModerno_classifier.fit(x_vehiculoModerno_train, y_vehiculoModerno_train)

RandomForestClassifier(criterion='entropy', random_state=42)

In [122]:
#Predict
RF_vehiculoModerno_pred = RF_vehiculoModerno_classifier.predict(x_vehiculoModerno_test)

In [123]:
#Accuracy
print('Testing Set Evaluation F1-Score=>',metrics.accuracy_score(y_vehiculoModerno_test,RF_vehiculoModerno_pred))

Testing Set Evaluation F1-Score=> 0.6471287128712871
