##Paqueterias

In [None]:
import pandas as pd
import numpy as np
import zipfile
from google.colab import drive
import plotly.express as px
 
# Librerias auxiliares
import unicodedata
from sklearn.model_selection import train_test_split
import re

##Funciones

In [None]:
def completitud(df):
    comple=pd.DataFrame(df.isnull().sum())
    comple.reset_index(inplace=True)
    comple=comple.rename(columns={"index":"variable",0:"total"})
    comple["completitud"]=(1-comple["total"]/df.shape[0])*100
    comple=comple.sort_values(by="completitud",ascending=True)
    comple.reset_index(drop=True,inplace=True)
    return comple

def clean_text(text, pattern="[^a-zA-Z0-9]"):
    cleaned_text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore')
    cleaned_text = re.sub(pattern, " ", cleaned_text.decode("utf-8"), flags=re.UNICODE)
    cleaned_text = u' '.join(cleaned_text.lower().lstrip().strip().split())
    return cleaned_text

def clean_cat(var):
    cleaned_cat=" ".join(str(var).lower().lstrip().split())
    return cleaned_cat

def replace_nan(df, col, nan):
    df[col]=df[col].map(lambda x: np.nan if x==nan else x)
    
def convert_to_nan(df,var):
    var_index=list(df[df[var].map(lambda x:str(x).isdigit())][var].index)+list(df[~df[var].map(lambda x:any([str(y).isalpha() for y in str(x)]))].index)
    df.loc[var_index, var]=np.nan

def is_digit(df,col, flag):
    if flag==1:
        digit=df[df[col].map(lambda x:str(x).isdigit())]
    elif flag==0:
        digit=df[~df[col].map(lambda x:str(x).isdigit())]
    else:
        print("Valor no admitido")
    return digit

        
def is_alpha(df,col, flag):
    if flag==1:
        alpha=df[df[col].map(lambda x:any([str(y).isalpha() for y in str(x)]))]
    elif flag==0:
        alpha=df[~df[col].map(lambda x:any([str(y).isalpha() for y in str(x)]))]
    else:
        print("Valor no admitido")
    return alpha

def remover(lista, elementos):
    for i in elementos:
        lista.remove(i)

def freq(df:pd.DataFrame,var:list):
    
    if type(var)!=list:
        var = [var]
    for v in var:
        aux = df[v].value_counts().to_frame().sort_index()
        aux.columns = ['FA']
        aux['FR'] = aux['FA']/aux['FA'].sum()
        aux[['FAA','FRA']] = aux.cumsum()
        print(f'****Tabla de frecuencias  {v}  ***\n\n')
        print(aux)
        print("\n"*3)
        
    result=pd.DataFrame(df[col].value_counts(1))
    if result.shape[0]>0:
        if (result[col].values[0]>.91) :
            print(f"{col} -- VARIABLE UNITARIA")
        result[col]=result[col].map(lambda x:str(round(x*100,2))+"%")
        result.reset_index(inplace=True)
        result.columns=[col+"_valores","%_aparicion"]
    return result


def imputar(df, col,strategy):
    imp=SimpleImputer(missing_values=np.nan,strategy=strategy)
    imp.fit(df[[col]])
    df.reset_index(drop=True,inplace=True)
    df[col]=imp.transform(df[[col]])   
    

def unitarias(df,col):
    result=pd.DataFrame(df[col].value_counts(1))
    if result.shape[0]>0:
        if (result[col].values[0]>.91) :
            print(f"{col} -- VARIABLE UNITARIA")
            
def unitarias_per(df,col):
    result=pd.DataFrame(df[col].value_counts(1))
    if result.shape[0]>0:
        if (result[col].values[0]>.91) :
            print(f"{col} -- VARIABLE UNITARIA")
        result[col]=result[col].map(lambda x:str(round(x*100,2))+"%")
        result.reset_index(inplace=True)
        result.columns=[col+"_valores","%_aparicion"]
    return result


def ssample(df, var, n_sample):
    df_complemento,df_sample=train_test_split(df,test_size=n_sample,stratify=df[var],random_state=3)   
    return df_sample

def normalizar(df:pd.DataFrame,var:str,umbral:float=0.05)->tuple:
    """Esta función normaliza una variable discreta basada en el 
    principio de umbral de representatividad estadística.

    Args:
        df (pd.DataFrame): datos con v.d. a normalizar
        var (str): nombre de la variable
        umbral (float, optional): umbral estadístico deseado. Defaults to 0.05.

    Returns:
        tuple: nombre de la variable y mapa de normalización
    """
    aux = df[var].value_counts(1).to_frame()
    aux['map'] = np.where(aux[var]<umbral,'Otros',aux.index)
    if aux.loc[aux['map']=='Otros'][var].sum()<umbral:
        aux['map'].replace({'Otros':aux.head(1)['map'].values[0]},inplace=True)
    aux.drop(var,axis=1,inplace=True)
    return var,aux['map'].to_dict()




def histograma_(df, var, nbins, color, title, x_axis, y_axis, size):
    fig = px.histogram(df, x=var, nbins=nbins, color_discrete_sequence=[color] ,text_auto = True)

    fig.update_layout(
        title=title,
        xaxis_title=x_axis,
        yaxis_title=y_axis,
        font=dict(
            family="Century Gothic",
            size=size,
            color="Black"
        ),
        plot_bgcolor='rgba(0,0,0,0)'
    )

    fig.show()

def frecuencia(df:pd.DataFrame,var:str):

    result=pd.DataFrame(df[var].value_counts())
    if result.shape[0]>0:
        result[var]=result[var].map(lambda x:x)
        
        result.reset_index(inplace=True)
        
        result.columns=[var,"frecuencia"]   
    
    return result

def bar_plot(df, var,color, title, x_axis,y_axis,size):
    df_bar=frecuencia(df,var)
    fig = px.bar(df_bar, x=var, y="frecuencia", color_discrete_sequence=[color])

    fig.update_layout(
            title=title,
            xaxis_title=x_axis,
            yaxis_title=y_axis,
            font=dict(
                family="Century Gothic",
                size=size,
                color="Black"
            ),
            plot_bgcolor='rgba(0,0,0,0)'
        )

    fig.show()


def box_plot(df,var,color, title, y_axis,size):
    
    fig = px.box(df, y=var,color_discrete_sequence=[color])
    fig.update_layout(
                title=title,
                xaxis_title="",
                yaxis_title=y_axis,
                font=dict(
                    family="Century Gothic",
                    size=size,
                    color="Black"
                ),
                plot_bgcolor='rgba(0,0,0,0)'
            )
    fig.show()

def OUTLIERS(df,cols):
    results=pd.DataFrame()
    data_iqr=df.copy()
    total=[]
    indices_=[]

    for col in cols:
        #IQR
        Q1=df[col].quantile(0.25)
        Q3=df[col].quantile(0.75)
        IQR=Q3-Q1
        INF=Q1-1.5*(IQR)
        SUP=Q3+1.5*(IQR)
    
        
        n_outliers=df[(df[col] < INF) | (df[col] > SUP)].shape[0]
        total.append(n_outliers)
        indices_iqr=list(df[(df[col] < INF) | (df[col] > SUP)].index)
        indices_.append(indices_iqr)
       
        
    results["variables"]=cols
    results["n_outliers_IQR"]=total
    #results["n_outliers_IQR_%"]=round((results["n_outliers_IQR"]/df.shape[0])*100,2)
    results["indices"]=indices_
    #results["total_outliers"]=results["indices"].map(lambda x:len(x))
    #results["%_outliers"]=results["indices"].map(lambda x:round(((len(x)/df.shape[0])*100),2))
    results=results[['variables', 'n_outliers_IQR', 'indices']]
    return results

##Extraccion y lectura de datos

In [None]:
!pip install wget
!wget https://github.com/Dereck125/archivo/raw/master/SantaFeGrill_4e.xls

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
--2023-03-26 02:20:14--  https://github.com/Dereck125/archivo/raw/master/SantaFeGrill_4e.xls
Resolving github.com (github.com)... 140.82.114.4
Connecting to github.com (github.com)|140.82.114.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/Dereck125/archivo/master/SantaFeGrill_4e.xls [following]
--2023-03-26 02:20:15--  https://raw.githubusercontent.com/Dereck125/archivo/master/SantaFeGrill_4e.xls
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 165376 (162K) [application/octet-stream]
Saving to: ‘SantaFeGrill_4e.xls.1’


2023-03-26 02:20:15 (9.40 MB/s) - ‘SantaFeGrill_4e.xls.1’ saved [165376/165

In [None]:


#Descarga y extraccion del archivo xls

path = "/content/SantaFeGrill_4e.xls"

df=pd.read_excel(path)

##Exploracion de datos

In [None]:
df.head()

Unnamed: 0,id,x_s1,x_s2,x_s3,X_s4,x1,x2,x3,x4,x5,...,x34,x35,x36,x37,x38,x39,x40,x41,x42,x43
0,1,1,1,1,jose,7,4,5,4,4,...,2,15000,6.0,4.33,5.0,4.0,4.33,5.67,4.0,6.0
1,6,1,1,1,jose,3,5,5,3,6,...,2,100000,4.33,3.0,4.33,5.5,3.67,6.67,6.0,5.0
2,7,1,1,1,jose,4,2,5,4,2,...,3,65000,4.0,4.33,5.0,2.0,3.67,5.33,4.0,6.0
3,9,1,1,1,jose,7,5,5,3,5,...,3,100000,6.67,3.0,5.0,5.0,5.33,6.33,4.5,5.0
4,10,1,1,1,jose,4,4,6,4,4,...,3,120000,5.67,3.33,6.0,4.0,5.0,6.67,5.0,6.0


In [None]:
# Dimensión  de la base de datos

print(f"Número de filas: { df.shape[0]}")
print(f"Número de columnas: { df.shape[1]}")

Número de filas: 405
Número de columnas: 48


In [None]:
# Tipo de dato
df.dtypes

id        int64
x_s1      int64
x_s2      int64
x_s3      int64
X_s4     object
x1        int64
x2        int64
x3        int64
x4        int64
x5        int64
x6        int64
x7        int64
x8        int64
x9        int64
x10       int64
x11       int64
x12       int64
x13       int64
x14       int64
x15       int64
x16       int64
x17       int64
x18       int64
x19       int64
x20       int64
x21       int64
x22       int64
x23       int64
x24       int64
x25       int64
x26       int64
x27       int64
x28       int64
x29       int64
x30       int64
x31       int64
x32       int64
x33       int64
x34       int64
x35       int64
x36     float64
x37     float64
x38     float64
x39     float64
x40     float64
x41     float64
x42     float64
x43     float64
dtype: object

In [None]:
# Resumen de variables cualitatitvas

df.describe(include=object)

Unnamed: 0,X_s4
count,405
unique,2
top,sta_fe
freq,253


In [None]:
df.columns


Index(['id', 'x_s1', 'x_s2', 'x_s3', 'X_s4', 'x1', 'x2', 'x3', 'x4', 'x5',
       'x6', 'x7', 'x8', 'x9', 'x10', 'x11', 'x12', 'x13', 'x14', 'x15', 'x16',
       'x17', 'x18', 'x19', 'x20', 'x21', 'x22', 'x23', 'x24', 'x25', 'x26',
       'x27', 'x28', 'x29', 'x30', 'x31', 'x32', 'x33', 'x34', 'x35', 'x36',
       'x37', 'x38', 'x39', 'x40', 'x41', 'x42', 'x43'],
      dtype='object')

##Etiquetado de variables

In [None]:
# Identificador

varid=['id']

# Variables cuantitativas
varc=[ 'x33', 'x35']

# Variables cualitativas: categoricas
vard= ['x_s1', 'x_s2', 'x_s3','x1', 'x2', 'x3', 'x4', 'x5',
       'x6', 'x7', 'x8', 'x9', 'x10', 'x11', 'x12', 'x13', 'x14', 'x15', 'x16',
       'x17', 'x18', 'x19', 'x20', 'x21', 'x22', 'x23', 'x24', 'x25', 'x26',
       'x27', 'x28', 'x29', 'x30', 'x31', 'x32', 'x34', 'x36',
       'x37', 'x38', 'x39', 'x40', 'x41', 'x42', 'x43']
      
# Variables tipo texto
vartxt=['X_s4' ]



In [None]:
df[varid+vartxt+vard+varc].shape[1]

48

In [None]:
df_2=df[varid+vartxt+varc+vard].copy()

In [None]:
df_2.head(10)

Unnamed: 0,id,X_s4,x33,x35,x_s1,x_s2,x_s3,x1,x2,x3,...,x32,x34,x36,x37,x38,x39,x40,x41,x42,x43
0,1,jose,0,15000,1,1,1,7,4,5,...,0,2,6.0,4.33,5.0,4.0,4.33,5.67,4.0,6.0
1,6,jose,1,100000,1,1,1,3,5,5,...,1,2,4.33,3.0,4.33,5.5,3.67,6.67,6.0,5.0
2,7,jose,0,65000,1,1,1,4,2,5,...,0,3,4.0,4.33,5.0,2.0,3.67,5.33,4.0,6.0
3,9,jose,1,100000,1,1,1,7,5,5,...,1,3,6.67,3.0,5.0,5.0,5.33,6.33,4.5,5.0
4,10,jose,0,120000,1,1,1,4,4,6,...,1,3,5.67,3.33,6.0,4.0,5.0,6.67,5.0,6.0
5,15,jose,1,70000,1,1,1,7,2,4,...,0,3,7.0,5.33,3.33,2.0,5.33,4.33,3.0,5.0
6,20,jose,0,21000,1,1,1,7,5,5,...,0,4,7.0,4.0,5.0,5.0,5.33,5.33,4.0,5.0
7,26,jose,1,95000,1,1,1,7,7,6,...,1,1,6.67,3.67,5.67,7.0,6.0,6.0,5.0,6.0
8,27,jose,0,90000,1,1,1,7,5,4,...,0,3,7.0,4.33,4.33,5.0,5.33,5.67,3.0,4.0
9,28,jose,1,100000,1,1,1,7,4,6,...,1,1,6.67,3.0,4.33,4.0,6.33,6.67,3.0,6.0


##Calidad de datos

###Orden

In [None]:
df.dtypes

id        int64
x_s1      int64
x_s2      int64
x_s3      int64
X_s4     object
x1        int64
x2        int64
x3        int64
x4        int64
x5        int64
x6        int64
x7        int64
x8        int64
x9        int64
x10       int64
x11       int64
x12       int64
x13       int64
x14       int64
x15       int64
x16       int64
x17       int64
x18       int64
x19       int64
x20       int64
x21       int64
x22       int64
x23       int64
x24       int64
x25       int64
x26       int64
x27       int64
x28       int64
x29       int64
x30       int64
x31       int64
x32       int64
x33       int64
x34       int64
x35       int64
x36     float64
x37     float64
x38     float64
x39     float64
x40     float64
x41     float64
x42     float64
x43     float64
dtype: object

###Limpieza de variables categoricas y tipo texto

In [None]:
# Limpieza de variables tipo texto

for v in vartxt:
    df_2[v]=df_2[v].map(lambda x: clean_text(x))
    
# Limpieza de variables categoricas
for v in vard:
    df_2[v]=df_2[v].map(lambda x: clean_cat(x))

###Completitud

In [None]:
df_completitud=completitud(df_2)
df_completitud

Unnamed: 0,variable,total,completitud
0,id,0,100.0
1,x20,0,100.0
2,x21,0,100.0
3,x22,0,100.0
4,x23,0,100.0
5,x24,0,100.0
6,x25,0,100.0
7,x26,0,100.0
8,x27,0,100.0
9,x28,0,100.0


In [None]:
miss_drop=list(df_completitud[df_completitud['completitud']<80]['variable'])
df_2=df_2.drop(columns=miss_drop)

###Duplicados

In [None]:
# Numero de duplicados

print(f"Número de duplicados general : { df_2.duplicated().sum()}")

Número de duplicados general : 0


###Precision

In [None]:
# Se verifica si existe una variable categorica que sea unitaria

for v in vard:
    unitarias(df_2,v)

x_s1 -- VARIABLE UNITARIA
x_s2 -- VARIABLE UNITARIA
x_s3 -- VARIABLE UNITARIA


In [None]:
# Porcentaje de representatividad de una categoria dentro de
# cada variable discreta

for v in vard:
    display(unitarias_per(df_2,v))
    print("\n")

x_s1 -- VARIABLE UNITARIA


Unnamed: 0,x_s1_valores,%_aparicion
0,1,100.0%




x_s2 -- VARIABLE UNITARIA


Unnamed: 0,x_s2_valores,%_aparicion
0,1,100.0%




x_s3 -- VARIABLE UNITARIA


Unnamed: 0,x_s3_valores,%_aparicion
0,1,100.0%






Unnamed: 0,x1_valores,%_aparicion
0,7,56.3%
1,3,26.91%
2,4,15.31%
3,2,1.48%






Unnamed: 0,x2_valores,%_aparicion
0,4,42.96%
1,5,27.41%
2,3,17.53%
3,6,6.67%
4,7,2.96%
5,2,2.47%






Unnamed: 0,x3_valores,%_aparicion
0,5,27.9%
1,6,27.65%
2,7,18.27%
3,4,16.54%
4,3,9.38%
5,2,0.25%






Unnamed: 0,x4_valores,%_aparicion
0,3,30.12%
1,5,24.2%
2,6,19.01%
3,4,16.3%
4,2,10.37%






Unnamed: 0,x5_valores,%_aparicion
0,4,39.51%
1,5,29.38%
2,3,12.1%
3,6,9.63%
4,2,4.69%
5,7,4.69%






Unnamed: 0,x6_valores,%_aparicion
0,5,31.85%
1,6,28.64%
2,7,16.79%
3,4,14.07%
4,3,8.64%






Unnamed: 0,x7_valores,%_aparicion
0,5,33.33%
1,6,22.22%
2,4,20.74%
3,3,15.31%
4,2,4.44%
5,7,3.95%






Unnamed: 0,x8_valores,%_aparicion
0,5,29.38%
1,6,22.72%
2,4,22.47%
3,3,18.02%
4,2,4.44%
5,7,2.96%






Unnamed: 0,x9_valores,%_aparicion
0,7,47.9%
1,4,21.73%
2,6,15.8%
3,3,13.83%
4,2,0.49%
5,1,0.25%






Unnamed: 0,x10_valores,%_aparicion
0,4,35.56%
1,5,27.65%
2,3,16.54%
3,6,14.32%
4,2,2.96%
5,7,2.96%






Unnamed: 0,x11_valores,%_aparicion
0,7,54.57%
1,4,23.7%
2,3,21.73%






Unnamed: 0,x12_valores,%_aparicion
0,5,31.6%
1,4,29.14%
2,2,25.43%
3,3,12.35%
4,1,1.48%






Unnamed: 0,x13_valores,%_aparicion
0,5,42.22%
1,4,32.1%
2,6,15.56%
3,3,8.15%
4,2,1.48%
5,7,0.49%






Unnamed: 0,x14_valores,%_aparicion
0,6,32.59%
1,4,23.21%
2,5,19.26%
3,3,16.3%
4,2,8.64%






Unnamed: 0,x15_valores,%_aparicion
0,7,41.23%
1,5,22.96%
2,4,20.99%
3,6,14.81%






Unnamed: 0,x16_valores,%_aparicion
0,4,30.62%
1,6,22.96%
2,5,21.48%
3,3,16.3%
4,2,8.64%






Unnamed: 0,x17_valores,%_aparicion
0,5,41.23%
1,4,23.46%
2,6,19.75%
3,3,12.59%
4,2,1.48%
5,7,1.48%






Unnamed: 0,x18_valores,%_aparicion
0,5,33.83%
1,6,24.2%
2,4,21.73%
3,7,17.28%
4,3,2.96%






Unnamed: 0,x19_valores,%_aparicion
0,4,38.52%
1,3,22.22%
2,2,16.54%
3,6,8.64%
4,1,8.15%
5,7,5.93%






Unnamed: 0,x20_valores,%_aparicion
0,5,36.54%
1,4,20.74%
2,3,20.0%
3,6,16.3%
4,7,3.21%
5,2,3.21%






Unnamed: 0,x21_valores,%_aparicion
0,7,53.83%
1,4,18.77%
2,3,16.79%
3,2,6.91%
4,1,3.46%
5,6,0.25%






Unnamed: 0,x22_valores,%_aparicion
0,4,36.54%
1,5,23.46%
2,6,22.96%
3,3,9.38%
4,7,7.65%






Unnamed: 0,x23_valores,%_aparicion
0,4,34.32%
1,5,25.19%
2,3,18.77%
3,6,17.78%
4,7,2.47%
5,2,1.48%






Unnamed: 0,x24_valores,%_aparicion
0,4,29.14%
1,3,28.4%
2,5,19.26%
3,2,15.06%
4,6,6.67%
5,7,1.48%






Unnamed: 0,x25_valores,%_aparicion
0,3,24.94%
1,4,22.47%
2,5,22.47%
3,2,17.28%
4,1,12.84%






Unnamed: 0,x26_valores,%_aparicion
0,1,32.1%
1,2,23.21%
2,3,22.47%
3,4,22.22%






Unnamed: 0,x27_valores,%_aparicion
0,1,54.57%
1,2,31.11%
2,3,13.83%
3,4,0.49%






Unnamed: 0,x28_valores,%_aparicion
0,3,47.16%
1,4,44.44%
2,2,4.94%
3,1,3.46%






Unnamed: 0,x29_valores,%_aparicion
0,2,39.75%
1,4,33.33%
2,3,17.04%
3,1,9.88%






Unnamed: 0,x30_valores,%_aparicion
0,3,39.51%
1,2,31.85%
2,1,28.64%






Unnamed: 0,x31_valores,%_aparicion
0,0,66.67%
1,1,33.33%






Unnamed: 0,x32_valores,%_aparicion
0,0,65.19%
1,1,34.81%






Unnamed: 0,x34_valores,%_aparicion
0,3,50.62%
1,4,25.19%
2,2,10.37%
3,1,9.63%
4,5,4.2%






Unnamed: 0,x36_valores,%_aparicion
0,7.0,36.05%
1,3.0,11.6%
2,6.0,10.62%
3,4.0,10.37%
4,6.67,9.88%
5,3.67,5.43%
6,3.33,5.19%
7,5.67,3.46%
8,5.0,2.96%
9,4.33,1.48%






Unnamed: 0,x37_valores,%_aparicion
0,4.0,11.6%
1,4.67,11.6%
2,5.67,11.6%
3,3.67,10.86%
4,3.0,9.63%
5,4.33,8.64%
6,6.0,7.16%
7,2.67,6.42%
8,5.0,6.42%
9,3.33,6.17%






Unnamed: 0,x38_valores,%_aparicion
0,5.0,16.05%
1,5.33,12.35%
2,6.0,12.1%
3,5.67,9.38%
4,4.33,9.14%
5,4.67,8.4%
6,6.33,6.91%
7,4.0,6.67%
8,3.67,5.43%
9,6.67,4.2%






Unnamed: 0,x39_valores,%_aparicion
0,4.0,34.32%
1,5.0,19.51%
2,3.0,12.59%
3,4.5,8.89%
4,3.5,7.9%
5,5.5,6.17%
6,6.0,4.69%
7,6.5,2.22%
8,7.0,1.98%
9,2.0,1.48%






Unnamed: 0,x40_valores,%_aparicion
0,5.33,21.23%
1,4.67,11.36%
2,4.33,10.37%
3,2.0,8.89%
4,2.67,8.4%
5,3.67,7.41%
6,5.0,5.68%
7,6.0,4.44%
8,6.33,4.44%
9,2.33,4.2%






Unnamed: 0,x41_valores,%_aparicion
0,5.67,15.31%
1,5.0,13.09%
2,4.0,12.1%
3,6.33,10.12%
4,6.67,9.88%
5,4.67,9.63%
6,5.33,7.9%
7,6.0,6.42%
8,4.33,5.68%
9,3.67,5.43%






Unnamed: 0,x42_valores,%_aparicion
0,4.0,23.21%
1,6.0,22.96%
2,3.0,16.3%
3,5.0,11.85%
4,5.5,9.63%
5,2.0,8.64%
6,4.5,7.41%






Unnamed: 0,x43_valores,%_aparicion
0,5.0,34.81%
1,4.0,18.52%
2,4.5,14.81%
3,6.0,13.58%
4,3.0,6.67%
5,5.5,5.19%
6,3.5,4.44%
7,2.0,1.48%
8,7.0,0.25%
9,6.5,0.25%






In [None]:
df_2.head()

Unnamed: 0,id,X_s4,x33,x35,x_s1,x_s2,x_s3,x1,x2,x3,...,x32,x34,x36,x37,x38,x39,x40,x41,x42,x43
0,1,jose,0,15000,1,1,1,7,4,5,...,0,2,6.0,4.33,5.0,4.0,4.33,5.67,4.0,6.0
1,6,jose,1,100000,1,1,1,3,5,5,...,1,2,4.33,3.0,4.33,5.5,3.67,6.67,6.0,5.0
2,7,jose,0,65000,1,1,1,4,2,5,...,0,3,4.0,4.33,5.0,2.0,3.67,5.33,4.0,6.0
3,9,jose,1,100000,1,1,1,7,5,5,...,1,3,6.67,3.0,5.0,5.0,5.33,6.33,4.5,5.0
4,10,jose,0,120000,1,1,1,4,4,6,...,1,3,5.67,3.33,6.0,4.0,5.0,6.67,5.0,6.0


In [None]:
#se eliminan las variables categoricas unitarias
columns_unit=['x_s1','x_s2','x_s3']
#Tambien los eliminamos de la variable vard
df_2=df_2.drop(columns=columns_unit)

In [None]:
for i in columns_unit:
  vard.remove(i)

In [None]:

#Es necesario hacer la conversion de todos los valores de tipo int a float para asi poder trabajar con las medidas de tendencia central
for v in vard:
    df_2[v]=df_2[v].astype(float)

for v in varc:
    df_2[v]=df_2[v].astype(float)


In [None]:
df_2.head()


Unnamed: 0,id,X_s4,x33,x35,x1,x2,x3,x4,x5,x6,...,x32,x34,x36,x37,x38,x39,x40,x41,x42,x43
0,1,jose,0.0,15000.0,7.0,4.0,5.0,4.0,4.0,5.0,...,0.0,2.0,6.0,4.33,5.0,4.0,4.33,5.67,4.0,6.0
1,6,jose,1.0,100000.0,3.0,5.0,5.0,3.0,6.0,4.0,...,1.0,2.0,4.33,3.0,4.33,5.5,3.67,6.67,6.0,5.0
2,7,jose,0.0,65000.0,4.0,2.0,5.0,4.0,2.0,5.0,...,0.0,3.0,4.0,4.33,5.0,2.0,3.67,5.33,4.0,6.0
3,9,jose,1.0,100000.0,7.0,5.0,5.0,3.0,5.0,5.0,...,1.0,3.0,6.67,3.0,5.0,5.0,5.33,6.33,4.5,5.0
4,10,jose,0.0,120000.0,4.0,4.0,6.0,4.0,4.0,6.0,...,1.0,3.0,5.67,3.33,6.0,4.0,5.0,6.67,5.0,6.0


## Variables cuantitativas

In [None]:
df_2[varc].describe(percentiles=np.linspace(0.1,1,10))

Unnamed: 0,x33,x35
count,405.0,405.0
mean,0.797531,71683.95
std,0.834583,87779.22
min,0.0,15000.0
10%,0.0,20000.0
20%,0.0,22000.0
30%,0.0,25000.0
40%,0.0,35000.0
50%,1.0,60000.0
60%,1.0,75000.0


## Variables categoricas

In [None]:
df_2[vard].describe()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,...,x32,x34,x36,x37,x38,x39,x40,x41,x42,x43
count,405.0,405.0,405.0,405.0,405.0,405.0,405.0,405.0,405.0,405.0,...,405.0,405.0,405.0,405.0,405.0,405.0,405.0,405.0,405.0,405.0
mean,5.390123,4.271605,5.281481,4.11358,4.412346,5.308642,4.654321,4.567901,5.597531,4.42716,...,0.348148,3.039506,5.46963,4.370296,5.081432,4.341975,4.13363,5.199728,4.423457,4.666667
std,1.863113,1.00509,1.222632,1.307608,1.114862,1.162981,1.218265,1.228158,1.605527,1.104784,...,0.476972,0.953586,1.590886,1.100193,0.998331,0.977958,1.383579,0.9947,1.267088,0.881605
min,2.0,2.0,2.0,2.0,2.0,3.0,2.0,2.0,1.0,2.0,...,0.0,1.0,2.0,2.0,2.67,2.0,1.0,3.0,2.0,2.0
25%,3.0,4.0,4.0,3.0,4.0,5.0,4.0,4.0,4.0,4.0,...,0.0,3.0,4.0,3.67,4.33,4.0,2.67,4.33,4.0,4.0
50%,7.0,4.0,5.0,4.0,4.0,5.0,5.0,5.0,6.0,4.0,...,0.0,3.0,6.0,4.33,5.0,4.0,4.33,5.33,4.5,5.0
75%,7.0,5.0,6.0,5.0,5.0,6.0,6.0,6.0,7.0,5.0,...,1.0,4.0,7.0,5.33,6.0,5.0,5.33,6.0,5.5,5.0
max,7.0,7.0,7.0,6.0,7.0,7.0,7.0,7.0,7.0,7.0,...,1.0,5.0,7.0,6.67,7.0,7.0,6.33,7.0,6.0,7.0


In [None]:
df_2[vartxt].describe()

Unnamed: 0,X_s4
count,405
unique,2
top,sta fe
freq,253


#Graficos

###Restaurante Jose VS Santa fe

preguntas:
1. Plantea el problema a resolver.
2. ¿Los clientes están satisfechos con el restaurante?
3. ¿Existen problemas con la comida, la atmósfera, o otro aspecto operacional del restaurante?
4. ¿Esta correctamente defnido el mercado objetivo o necesita enforcarse a un nuevo nicho?
5. ¿Cuales son las caracteristicas en común de los clientes satisfechos?
6. ¿Qué piensan los clientes de Santa Fe Grill de sus experiencias culinarias, comparado con los clientes del restaurante Jose?

###2. ¿Los clientes están satisfechos con el restaurante?

In [None]:
bar_plot(df_2,var= "X_s4", color = "green" , title  = "Restaurante favorito",x_axis="Nombre del restaurante mexicano favorito",y_axis="Frecuencia",size = 10 )

In [None]:
bar_plot(df=df_2,var='x22', 
        color="#5DADE2 ",
        title='Satisfaccion',
        x_axis="",
        y_axis='Frecuencia',
        size=15)

In [None]:
print(df_2['x22'].mode())

0    4.0
Name: x22, dtype: float64


In [None]:
df_2["x22"].describe()

count    405.000000
mean       4.829630
std        1.118305
min        3.000000
25%        4.000000
50%        5.000000
75%        6.000000
max        7.000000
Name: x22, dtype: float64

###3. ¿Existen problemas con la comida, la atmósfera, o otro aspecto operacional del restaurante?

In [None]:

var_c = ['x14','x15','x18','x20','x27']                           #Comida
var_p = ['x16','x26']                                       #Precio
var_opinion = ['x22','x23','x24','x25']                     
var_ser = ['x12','x19','x21','x29','x40','x36']             #Servicio
var_atmos =['x13','x17','x28','x43']
var_clientes_gustos = ['x1','x2','x3','x4','x5','x6',          
                       'x7','x8','x9','x10','x11','x30']    
var_inf_clientes = ['x32','x33','x34','x35']
tipo_clientes = ['x2','x5','x8','x10','x11','x37','x38','x39']

In [None]:
df_comida = df_2[var_c]
df_precio = df_2[var_p]
df_opinion = df_2[var_opinion]
df_servicio = df_2[var_ser]
df_atmosfera = df_2[var_atmos]
df_gustos = df_2[var_clientes_gustos]
df_inf_clientes = df_2[var_inf_clientes]
df_tipo_cliente = df_2[tipo_clientes]

In [None]:
df_comida = df_comida.rename({'x14': 'Tamaño de las porciones', 
                              'x15': 'Frescura', 
                              'x18': 'Sabor', 
                              'x20': 'Temperatura adecuada', 
                              'x27':'Importancia de la calidad de la comida'},
                              axis=1)
df_precio = df_precio.rename({'x16':'Precio Razonable',
                              'x26':'Importancia del precio'},
                             axis = 1)
df_opinion = df_opinion.rename({'x22':'Satisfaccion',
                                'x23':'Le gustaria regresar',
                                'x24':'Lo recomendaria',
                                'x25':'Frecuencia de comer ahí'},axis = 1)
df_servicio = df_servicio.rename({'x12':'Empleados amigables',
                                  'x19':'Empleados bien capacitados',
                                  'x21':'Servicio rapido',
                                  'x29':'Importancia servicio',
                                  'x40':'Servicio',
                                  'x36':'inovador'},axis= 1)
df_atmosfera = df_atmosfera.rename({'x13':'Lugar divertido ',
                                    'x17':'Interior atractivo',
                                    'x28':'importancia atmosfera',
                                    'x43':'Atmosfera'},axis = 1)
df_inf_clientes = df_inf_clientes.rename({'x32':'Genero',
                                          'x33':'Numero de hijos',
                                          'x34':'Edad','x35':'Ingreso'},axis = 1)
df_tipo_cliente = df_tipo_cliente.rename({'x2':'Fiestero',
                                          'x5':'Social',
                                          'x8':'Alimentacion nutritiva y balanceada',
                                          'x10':'Cuidadoso con lo que come',
                                          'x11':'Probar nuevs marcas',
                                          'x37':'Conciente de la salud',
                                          'x38':'Opinion lider',
                                          'x39':'Extrovertido'},axis = 1)

In [None]:
for i in df_comida.columns.values:
  bar_plot(df=df_comida,var= i, 
        color="#5DADE2 ",
        title=i,
        x_axis="",
        y_axis='Respuestas',
        size=10)

In [None]:
for i in df_precio.columns.values:
  bar_plot(df=df_precio,var= i, 
        color="#5DADE2 ",
        title=i,
        x_axis="",
        y_axis='Respuestas',
        size=10)


In [None]:
df_precio.describe()

Unnamed: 0,Precio Razonable,Importancia del precio
count,405.0,405.0
mean,4.338272,2.348148
std,1.237469,1.147437
min,2.0,1.0
25%,4.0,1.0
50%,4.0,2.0
75%,5.0,3.0
max,6.0,4.0


In [None]:
modas_comida =[]
for i in df_comida.columns.values:
    print(df_comida[i].mode()[0])
    modas_comida.append(df_comida[i].mode()[0])

6.0
7.0
5.0
5.0
1.0


In [None]:
def porcentaje(df:pd.DataFrame,columnas,a,b):
  lista_general = []
  for j in columnas:
    print(j)
    suma=0
    lista_por=[]
    for i in range(a,b): 
      v= df.apply(lambda x: x[j] == 0.0 + i , axis=1).sum()
      porcentaje = round((v/405),2)
      lista_por.append(porcentaje * 100)
      print(f'valor {i}',v,f'{round((v/405)*100)}%')
    lista_general.append(lista_por)
    print(" ")
  return lista_general



a = porcentaje(df_comida,df_comida.columns.values,0,9)


import plotly.graph_objects as go

fig = go.Figure(
    
    data=[go.Scatter(
    
    x=df_comida.columns.values, y=modas_comida,
    text=['De acuerdo<br>33%', 'Muy de acuerdo<br>41%', 'Casi de acuerdo<br>34%', 'Casi de acuerdo<br>37%','Muy importante<br>55%'],
    mode='markers',
    marker=dict(
        color=['rgb(93, 164, 214)', 'rgb(255, 144, 14)',
               'rgb(44, 160, 101)', 'rgb(255, 65, 54)',
               'rgb(40, 10, 100)'],
        opacity=[0.8, 0.8, 0.8, 0.8,0.8],
        size=[33,41,34,37,55],
    )

)])
fig.update_layout(title='Comida')

fig.show()

Tamaño de las porciones
valor 0 0 0%
valor 1 0 0%
valor 2 35 9%
valor 3 66 16%
valor 4 94 23%
valor 5 78 19%
valor 6 132 33%
valor 7 0 0%
valor 8 0 0%
 
Frescura
valor 0 0 0%
valor 1 0 0%
valor 2 0 0%
valor 3 0 0%
valor 4 85 21%
valor 5 93 23%
valor 6 60 15%
valor 7 167 41%
valor 8 0 0%
 
Sabor
valor 0 0 0%
valor 1 0 0%
valor 2 0 0%
valor 3 12 3%
valor 4 88 22%
valor 5 137 34%
valor 6 98 24%
valor 7 70 17%
valor 8 0 0%
 
Temperatura adecuada
valor 0 0 0%
valor 1 0 0%
valor 2 13 3%
valor 3 81 20%
valor 4 84 21%
valor 5 148 37%
valor 6 66 16%
valor 7 13 3%
valor 8 0 0%
 
Importancia de la calidad de la comida
valor 0 0 0%
valor 1 221 55%
valor 2 126 31%
valor 3 56 14%
valor 4 2 0%
valor 5 0 0%
valor 6 0 0%
valor 7 0 0%
valor 8 0 0%
 


In [None]:
ser_disc = ['Empleados amigables', 'Empleados bien capacitados',
       'Servicio rapido', 'Importancia servicio']
serv_con = [ 'Servicio', 'inovador']

for i in ser_disc:
  bar_plot(df=df_servicio,var= i, 
        color="#884EA0 ",
        title=i,
        x_axis="",
        y_axis='Respuestas',
        size=15)
for j in serv_con:
  histograma_(df=df_servicio, var = j , 
              nbins= 25, 
              color  = "blue", 
              title = j,
              x_axis="calificacion", 
              y_axis= "",size=15)


In [None]:
df_servicio.iloc[:, [4,5]].describe()  

Unnamed: 0,Servicio,inovador
count,405.0,405.0
mean,4.13363,5.46963
std,1.383579,1.590886
min,1.0,2.0
25%,2.67,4.0
50%,4.33,6.0
75%,5.33,7.0
max,6.33,7.0


In [None]:

at_dis = ['Lugar divertido ', 'Interior atractivo', 'importancia atmosfera']
atm_con=['Atmosfera']

for i in at_dis:
  bar_plot(df=df_atmosfera,var= i, 
        color="#5DADE2 ",
        title=i,
        x_axis="",
        y_axis='Respuestas',
        size=15)
  
for j in atm_con:
  histograma_(df=df_atmosfera,
              var = j , nbins= 25, 
              color  = "blue", 
              title = j,
              x_axis="calificacion", 
              y_axis= "",size=15)

In [None]:
df_atmosfera.iloc[:, -1].describe()


count    405.000000
mean       4.666667
std        0.881605
min        2.000000
25%        4.000000
50%        5.000000
75%        5.000000
max        7.000000
Name: Atmosfera, dtype: float64

In [None]:
for i in  df_opinion.columns.values:
  bar_plot(df=df_opinion,var= i, 
        color="#5DADE2 ",
        title=i,
        x_axis="",
        y_axis='Respuestas',
        size=15)
  

###4. ¿Esta correctamente defnido el mercado objetivo o necesita enforcarse a un nuevo nicho?

In [None]:
df_inf_clientes.columns.values

array(['Genero', 'Numero de hijos', 'Edad', 'Ingreso'], dtype=object)

In [None]:
lista_auc_inf=['Genero', 'Numero de hijos', 'Edad']
lista_ingreso=['Ingreso']

In [None]:
,#Edad, ingreso, hijos,distancia manejando, gender

for i in lista_auc_inf:
  bar_plot(df=df_inf_clientes,var= i, 
        color="#5DADE2 ",
        title=i,
        x_axis="",
        y_axis='Respuestas',
        size=10)
  


La varaible de ingreso, presenta datos atipicos, nececitamos hacer un analisis con y sin ellos.

#Histograma de ingresos con datos atipicos

In [None]:
bar_plot(df=df_inf_clientes,var= 'Ingreso', 
        color="#2471A3",
        title='Ingreso',
        x_axis="",
        y_axis='Respuestas',
        size=10)


In [None]:
box_plot(df=df_inf_clientes,var='Ingreso',
         color="#C70039" ,
        title= 'Boxplot del ingreso de los clientes',
        y_axis='Ingreso',
        size=15)

In [None]:
outliers=OUTLIERS(df_inf_clientes, ['Ingreso'])
outliers

Unnamed: 0,variables,n_outliers_IQR,indices
0,Ingreso,5,"[41, 115, 155, 207, 380]"


In [None]:
# Extraemos todos los indices que se detectaron como outliers
# para cada variable
indices=list(outliers["indices"].values)

In [None]:
# Sumamos los conjuntos para tener una sola lista
# que contenga todos los indices marcados como outliers
from  functools import reduce
indices=list(set(reduce(lambda x,y: x+y, indices)))


In [None]:
# Obtenemos el porcentaje de representatividad de
# los valores atipicos respecto al total

(len(indices)/df.shape[0])*100

1.2345679012345678

In [None]:
# A continuación se muestra el conjunto de datos que
# contiene los datos atipicos

df_outlier=df_inf_clientes[df.index.isin(indices)]
df_outlier

Unnamed: 0,Genero,Numero de hijos,Edad,Ingreso
41,0.0,0.0,2.0,850000.0
115,1.0,2.0,2.0,230000.0
155,0.0,0.0,4.0,590000.0
207,1.0,0.0,2.0,1250000.0
380,1.0,2.0,3.0,240000.0


Eliminacion de datos atipicos

In [None]:
# Eliminamos los registros que fueron catalogados
# como valores atipicos

df_inf_clientes=df_inf_clientes[~df.index.isin(indices)].reset_index(drop=True)

#Histograma de ingreso de los clientes sin datos atipicos

In [None]:
bar_plot(df=df_inf_clientes,var= 'Ingreso', 
        color="#2471A3",
        title='Ingreso',
        x_axis="",
        y_axis='Respuestas',
        size=10)

#Boxplot del ingreso de clientes sin datos atipicos

In [None]:
box_plot(df=df_inf_clientes,var='Ingreso',
         color="#C70039" ,
        title= 'Boxplot del ingreso de los clientes',
        y_axis='Ingreso',
        size=15)

###5. ¿Cuales son las caracteristicas en común de los clientes satisfechos?

In [None]:
#variable x27
bar_plot(df=df_comida,var= 'Importancia de la calidad de la comida', 
        color="#E74C3C ",
        title='Importancia de la calidad de la comida',
        x_axis="",
        y_axis='Respuestas',
        size=10)

In [None]:
#var x26
bar_plot(df=df_precio,var= 'Importancia del precio', 
        color="#E74C3C ",
        title='Importancia del precio',
        x_axis="",
        y_axis='Respuestas',
        size=10)

In [None]:
df_tipo_cliente.columns.values

array(['Fiestero', 'Social', 'Alimentacion nutritiva y balanceada',
       'Cuidadoso con lo que come', 'Probar nuevs marcas',
       'Conciente de la salud', 'Opinion lider', 'Extrovertido'],
      dtype=object)

In [None]:
tipo_cli_dis = ['Fiestero', 'Social',
       'Cuidadoso con lo que come', 'Probar nuevs marcas']
tipo_cli_con = ['Conciente de la salud', 'Opinion lider', 'Extrovertido']

In [None]:
#party person, health conscious, careful about what i eat,buy new products, like to go out socially 

for i in tipo_cli_dis:
  bar_plot(df=df_tipo_cliente,var= i, 
        color="#E74C3C ",
        title=i,
        x_axis="",
        y_axis='Respuestas',
        size=10)

In [None]:
for i in tipo_cli_con:
  histograma_(df=df_tipo_cliente,
              var = i , nbins= 20, 
              color  = "#E74C3C ", 
              title = i,
              x_axis="calificacion", 
              y_axis= "",size=15)

###6. ¿Qué piensan los clientes de Santa Fe Grill de sus experiencias culinarias, comparado con los clientes del restaurante Jose?

In [None]:
#variables de comida dependiendo del restaurante.
#Primero tenemos que dividir a las opiniones, dependiendo de su retaurante favorito
df_jose = df_2.loc[df_2['X_s4'] == 'jose']
df_sta_fe = df_2.loc[df_2['X_s4'] == 'sta fe']


In [None]:
df_comida_j = df_jose[var_c]
df_precio_j = df_jose[var_p]
df_opinion_j = df_jose[var_opinion]
df_servicio_j = df_jose[var_ser]
df_atmosfera_j = df_jose[var_atmos]


In [None]:
df_comida_j = df_comida_j.rename({'x14': 'Tamaño de las porciones', 'x15': 'Frescura', 'x18': 'Sabor', 'x20': 'Temperatura adecuada', 'x27': 'Calidad'}, axis=1)
df_precio_j = df_precio_j.rename({'x16':'Razonable','x26':'Precios'},axis = 1)
df_opinion_j = df_opinion_j.rename({'x22':'Satisfaccion','x23':'Le gustaria regresar','x24':'Lo recomendaria','x25':'Frecuencia de comer en...'},axis = 1)
df_servicio_j = df_servicio_j.rename({'x12':'Empleados amigables','x19':'Empleados bien capacitados','x21':'Servicio rapido','x29':'Importancia servicio','x40':'Servicio','x36':'inovador'},axis= 1)
df_atmosfera_j = df_atmosfera_j.rename({'x13':'Lugar divertido ','x17':'Interior atractivo','x28':'importancia atmosfera','x43':'Atmosfera'},axis = 1)


In [None]:

df_comida_s = df_sta_fe[var_c]
df_precio_s = df_sta_fe[var_p]
df_opinion_s = df_sta_fe[var_opinion]
df_servicio_s = df_sta_fe[var_ser]
df_atmosfera_s = df_sta_fe[var_atmos]


In [None]:
df_comida_s = df_comida_s.rename({'x14': 'Tamaño de las porciones', 'x15': 'Frescura', 'x18': 'Sabor', 'x20': 'Temperatura adecuada', 'x27': 'Calidad'}, axis=1)
df_precio_s = df_precio_s.rename({'x16':'Razonable','x26':'Precios'},axis = 1)
df_opinion_s = df_opinion_s.rename({'x22':'Satisfaccion','x23':'Le gustaria regresar','x24':'Lo recomendaria','x25':'Frecuencia de comer en...'},axis = 1)
df_servicio_s = df_servicio_s.rename({'x12':'Empleados amigables','x19':'Empleados bien capacitados','x21':'Servicio rapido','x29':'Importancia servicio','x40':'Servicio','x36':'inovador'},axis= 1)
df_atmosfera_s = df_atmosfera_s.rename({'x13':'Lugar divertido ','x17':'Interior atractivo','x28':'importancia atmosfera','x43':'Atmosfera'},axis = 1)


##Comida 

In [None]:
for i in df_comida_s.columns.values:
  bar_plot(df=df_comida_s,var= i, 
        color="blue ",
        title=i,
        x_axis="",
        y_axis='Respuestas',
        size=10)

for i in df_comida_j.columns.values:
  bar_plot(df=df_comida_j,var= i, 
        color="red ",
        title=i,
        x_axis="",
        y_axis='Respuestas',
        size=10)


In [None]:
ser_disc = ['Empleados amigables', 'Empleados bien capacitados',
       'Servicio rapido']
serv_con = [ 'Servicio', 'inovador']

at_dis = ['Lugar divertido ', 'Interior atractivo']
atm_con=['Atmosfera']


In [None]:

for i in at_dis:
  bar_plot(df=df_atmosfera_j,var= i, 
        color="red",
        title=i,
        x_axis="",
        y_axis='Respuestas',
        size=10)

for i in at_dis:
  bar_plot(df=df_atmosfera_s,var= i, 
        color="blue ",
        title=i,
        x_axis="",
        y_axis='Respuestas',
        size=10)


In [None]:
for j in atm_con:
  histograma_(df=df_atmosfera_j,
              var = j , nbins= 25, 
              color  = "red", 
              title = j,
              x_axis="calificacion", 
              y_axis= "",size=10)
for j in atm_con:
  histograma_(df=df_atmosfera_s,
              var = j , nbins= 25, 
              color  = "blue", 
              title = j,
              x_axis="calificacion", 
              y_axis= "",size=10)


In [None]:
for i in  df_opinion_s.columns.values:
  bar_plot(df=df_opinion_s,var= i, 
        color="blue ",
        title=i,
        x_axis="",
        y_axis='Respuestas',
        size=15)
for i in  df_opinion_j.columns.values:
  bar_plot(df=df_opinion_j,var= i, 
        color="red",
        title=i,
        x_axis="",
        y_axis='Respuestas',
        size=15)


In [None]:
ser_disc = ['Empleados amigables', 'Empleados bien capacitados',
       'Servicio rapido', 'Importancia servicio']
serv_con = [ 'Servicio', 'inovador']

for i in ser_disc:
  bar_plot(df=df_servicio_s,var= i, 
        color="blue ",
        title=i,
        x_axis="",
        y_axis='Respuestas',
        size=15)
for j in serv_con:
  histograma_(df=df_servicio_s, var = j , 
              nbins= 25, 
              color  = "blue", 
              title = j,
              x_axis="calificacion", 
              y_axis= "",size=15)


In [None]:
ser_disc = ['Empleados amigables', 'Empleados bien capacitados',
       'Servicio rapido', 'Importancia servicio']
serv_con = [ 'Servicio', 'inovador']

for i in ser_disc:
  bar_plot(df=df_servicio_j,var= i, 
        color="red ",
        title=i,
        x_axis="",
        y_axis='Respuestas',
        size=15)
for j in serv_con:
  histograma_(df=df_servicio_j, var = j , 
              nbins= 25, 
              color  = "red", 
              title = j,
              x_axis="calificacion", 
              y_axis= "",size=15)
