In [None]:
#Librerias y paquetes

import numpy as np 
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_curve, auc, roc_auc_score, classification_report, confusion_matrix, make_scorer

import warnings
warnings.filterwarnings("ignore")

In [None]:
# Cargar datos

df = pd.read_csv(r"C:\Users\carlo\OneDrive\Escritorio\Ensambles\BASE DE DATOS.csv", delimiter=';', decimal=',')  # Cargar el dataset
df.info()

In [None]:
# Conversión de tipos de datos que deberían ser categóricos

df['SEX'] = df['SEX'].astype('category')
df['EST'] = df['EST'].astype('category')
df['PRO'] = df['PRO'].astype('category')
df['SEM'] = df['SEM'].astype('category')
df['TRA'] = df['TRA'].astype('category')
df['FIN'] = df['FIN'].astype('category')
df['COL'] = df['COL'].astype('category')
df['DES'] = df['DES'].astype('category')

df.info() # Chequeemos los resultados

In [None]:
# Creación de dummies para variables no numéricas ("one hot encoding")

df = pd.get_dummies(df, columns = df.select_dtypes(exclude=['int64','float64']).columns, drop_first = True)

pd.options.display.max_columns = None # remove the limit on the number of columns by default only 20 are shows

df.head()  


In [None]:
#Escalar toda la matriz
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
dfs = scaler.fit_transform(df)

print(dfs)
dfs.shape

In [None]:
# Definir vector objetivo
y = df['DES_1']
X = df.drop(columns = 'DES_1')
y

In [None]:
# Definimos la semilla para el generador de número aleatorios
np.random.seed(15646)

# Dividimos los datos aleatoriamente en 70% para entrenamiento y 30% para prueba 
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.30, stratify=y)

# Chequeemos los resultados
print(X_train.shape)
print(y_train.shape)

print(X_test.shape)
print(y_test.shape)


In [None]:
# Escalar datos

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
XCS_train = scaler.fit_transform(X_train)
XCS_test = scaler.transform(X_test)

print(XCS_train)
print(XCS_test)

print(XCS_train.shape)
print(XCS_test.shape)


In [None]:
# Modelo ADABOOST1

from sklearn.ensemble import AdaBoostClassifier

ada_model1 = AdaBoostClassifier(n_estimators=50, learning_rate=0.1, random_state=0)
ada_model1.fit(XCS_train, y_train)

ypredtra=ada_model1.predict(XCS_train)

ypredtes=ada_model1.predict(XCS_test)

from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_train,ypredtra))
print(classification_report(y_train,ypredtra
                           ))
print(confusion_matrix(y_test,ypredtes))
print(classification_report(y_test,ypredtes))



In [None]:
# Modelo ADABOOST2

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

base_model= LogisticRegression()
base_model1= SVC()

ada_model1 = AdaBoostClassifier(base_estimator=base_model, n_estimators=50, learning_rate=0.1, random_state=0)
ada_model1.fit(XCS_train, y_train)

ypredtra=ada_model1.predict(XCS_train)

ypredtes=ada_model1.predict(XCS_test)

from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_train,ypredtra))
print(classification_report(y_train,ypredtra
                           ))
print(confusion_matrix(y_test,ypredtes))
print(classification_report(y_test,ypredtes))


In [None]:
# Modelo Gradientboost #1

from sklearn.ensemble import GradientBoostingClassifier

boost_model1 = GradientBoostingClassifier(random_state=42)
boost_model1.fit(XCS_train, y_train)

ypredtra=boost_model1.predict(XCS_train)

ypredtes=boost_model1.predict(XCS_test)


from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_train,ypredtra))
print(classification_report(y_train,ypredtra
                           ))
print(confusion_matrix(y_test,ypredtes))
print(classification_report(y_test,ypredtes))


In [None]:
# Modelo XGboost #1

# Instalar por consola
# pip install xgboost
# conda install xgboost

from xgboost import XGBClassifier

xgb_model1 = XGBClassifier(n_estimators=100, learning_rate=0.1, random_state=0)
xgb_model1.fit(XCS_train, y_train)

ypredtra=xgb_model1.predict(XCS_train)

ypredtes=xgb_model1.predict(XCS_test)

from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_train,ypredtra))
print(classification_report(y_train,ypredtra
                           ))
print(confusion_matrix(y_test,ypredtes))
print(classification_report(y_test,ypredtes))

In [None]:
# Definir un Stacking de modelos

from sklearn.ensemble import StackingClassifier
from sklearn.tree import DecisionTreeClassifier

lista=[("lr",LogisticRegression()),("tree",DecisionTreeClassifier()),("svm",SVC())]
   
model=StackingClassifier(estimators=lista,final_estimator=DecisionTreeClassifier())
model.fit(XCS_train, y_train)

ypredtra=model.predict(XCS_train)

ypredtes=model.predict(XCS_test)

from sklearn.metrics import classification_report, confusion_matrix

print(confusion_matrix(y_train,ypredtra))
print(classification_report(y_train,ypredtra))
print(confusion_matrix(y_test,ypredtes))
print(classification_report(y_test,ypredtes))
    