In [205]:
import pandas as pd
import warnings 
import unidecode 

In [206]:
def read_data():
    '''
    Description:
    Read the data from the files and return the dataframes
    return: df, dfcc, dfc (dataframes) 
    '''
    df = pd.read_csv('results/EntrevistaInicial.csv')
    return df

In [207]:
def get_dict ():
    '''
    Description: Return a dictionary with the states
    return: dict_Estados (dictionary)
    '''

    dict_Mes ={1: 'Enero', 2: 'Febrero', 3: 'Marzo', 4: 'Abril', 5: 'Mayo', 6: 'Junio', 7: 'Julio', 8: 'Agosto', 9: 'Septiembre', 10: 'Octubre', 11: 'Noviembre', 12: 'Diciembre'}
    
    return dict_Mes

In [208]:
def transform_data(df):
    '''
    Description: Transform the data from the dataframe
    return: df (dataframe)
    '''
    
    dict_Mes = get_dict()
    df['MesRegistro'] = df['MesRegistro'].map(dict_Mes)
    return df

In [209]:
def split_date(df):
    '''
    Description: Split the date in year, month and day
    return: df (dataframe)
    '''
    
    df['FechaRegistro'] = pd.to_datetime(df['FechaRegistro'])
    df['AñoRegistro'] = df['FechaRegistro'].dt.year
    df['MesRegistro'] = df['FechaRegistro'].dt.month
    df['DiaRegistro'] = df['FechaRegistro'].dt.day
    df = transform_data(df)
    return df

In [210]:
def sem(df , lis_1sem, lis_2sem):
    '''
    Description: Create a column with the semester
    return: df (dataframe)
    '''
    
    df['Semestre'] = 0
    for ind, val in df['MesRegistro'].items():
        if val in lis_1sem:
            df.loc[ind, 'Semestre'] = 1
        elif val in lis_2sem:
            df.loc[ind, 'Semestre'] = 2
    return df

In [211]:
def group_sust():
    '''
    Description: Create a dictionary with the substances
    return: dict_sust_inverso (dictionary)
    '''
    
    dict_sust = {'Tabaco':(1,2) , 'Alcohol':(3,4,5,84), 'Marihuana':(6,7,8,9,10,11,),'Inalables':(24,25,26,27,28) , 'Cocaina':(12,13,14,15,86) , 'Metanfetaminas': (16,17,18,19,85) , 'OtrosEstimulantes':(19,20,21,22,23), 'Extasis':(22,50,51,48), 'Benzodiacepinas': (52,53) , 'OtrosDepresores': (54,55,56), 'Alucinogenos': (29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47), 'Heroina': (60,61), 'OpiodesSinteticos': (62,63,64,65,66,67), 'OpioDerivados': (57,58,59), 'UtilidadMedica': (68,69,70,71,72,73,74,75), 'SustanciasDeAbuso': (76,77,78,79,80,81,82), 'Otros': (83,10000)}
    dict_sust_inverso = {}
    for sustancia, numeros in dict_sust.items():
        for numero in numeros:
            dict_sust_inverso[numero] = sustancia
    return dict_sust_inverso

In [212]:
def mod_sust (df):
    '''
    Description: Modify the columns MayorImpactoSustanciaId
    return: df (dataframe)
    '''
    
    dict_sust = group_sust()
    for col in df.columns:
        if col.startswith('MayorImpactoSustanciaId'):
            df[col] = df[col].map(dict_sust)
    for col in df.columns:
        if col.startswith('SustanciaI'):
            df[col] = df[col].map(dict_sust)
    return df

In [213]:
def mod_data(df):
    '''
    Description: Modify the data from the dataframe
    return: df (dataframe)
    '''
    
    df = split_date(df)
    lista_1sem = ['Enero', 'Febrero', 'Marzo', 'Abril', 'Mayo', 'Junio']
    lista_2sem = ['Julio', 'Agosto', 'Septiembre', 'Octubre', 'Noviembre', 'Diciembre']
    df = sem(df, lista_1sem, lista_2sem)
    df = mod_sust(df)
    df['Edad'] = df['Edad'].astype(str)
    df['Edad'] = df['Edad'].apply(unidecode.unidecode)
    df['Edad'] = df['Edad'].str.replace('+' , '' )
    df['Edad'] = df['Edad'].str.replace('*' , '')
    df['Edad'] = df['Edad'].str.replace('.0' , '')
    df['Edad'] = df['Edad'].replace('', '0')
    df['Edad'] = df['Edad'].astype(float)

    return df

In [214]:
def range_age(df):
    '''
    Description: Create a column with the age range
    return: df (dataframe)
    '''
    
    df['Edad'] = pd.cut(df['Edad'], bins=[0, 11, 20, 30, 40, 50, 60, 150], labels=['0-10', '11-20', '21-30', '31-40', '41-50', '51-60', '65+'])
    return df

In [215]:
def gen_valueof_column():
    '''
    Description: Create a dictionary
    return: value_to_column (dictionary)
    '''
    value_to_column = {
        'Tabaco': 'Tabaco',
        'Alcohol': 'Alcohol',
        'Marihuana': 'Marihuana',
        'Inalables': 'Inalables',
        'Cocaina': 'Cocaina',
        'Metanfetaminas': 'Metanfetaminas',
        'OtrosEstimulantes': 'OtrosEstimulantes',
        'Extasis': 'Extasis',
        'Benzodiacepinas': 'Benzodiacepinas',
        'OtrosDepresores': 'OtrosDepresores',
        'Alucinogenos': 'Alucinogenos',
        'Heroina': 'Heroina',
        'OpiodesSinteticos': 'OpiodesSinteticos',
        'OpioDerivados': 'OpioDerivados',
        'UtilidadMedica': 'UtilidadMedica',
        'SustanciasDeAbuso': 'SustanciasDeAbuso',
        'Otros': 'Otros'
    }
    return value_to_column

In [216]:
def tendencias_MI(df):
    df_result = pd.DataFrame(columns = ['Semestre', 'Año' , 'Unidad' , 'Sexo' , 'Categoria', 'Tabaco' , 'Alcohol', 'Marihuana','Inalables', 'Cocaina' , 'Metanfetaminas', 'OtrosEstimulantes', 'Extasis', 'Benzodiacepinas', 'OtrosDepresores', 'Alucinogenos', 'Heroina', 'OpiodesSinteticos', 'OpioDerivados', 'UtilidadMedica', 'SustanciasDeAbuso', 'Otros'])
    if df_result.empty:
        df_result.loc[0] = 0    
    for col in df.columns:
        if col.startswith('MayorImpactoSustanciaId'):
            value_to_column = gen_valueof_column()
            for ind, val in df[col].items():
                if val in value_to_column:
                    df_result.loc[0, value_to_column[val]] += 1
    df_result['Semestre'][0] = df['Semestre'].iloc[0]
    df_result['Año'][0] = df['AñoRegistro'].iloc[0]
    df_result['Unidad'][0] = 'Total'
    df_result['Categoria'][0] = 'Droga impacto'
    df_result['Sexo'][0] = df['SexoId'].iloc[0]
    return df_result

In [217]:
def group_data(df):
    '''     
    Description: Group the data by semester
    return: df_result (dataframe)
    '''
    
    df_result = pd.DataFrame()
    for val , group in df.groupby(['Semestre', 'AñoRegistro', 'SexoId']):
        df_result = pd.concat([df_result, tendencias_MI(group)])
    return df_result

In [218]:
def tendencias_UM(df):
    df_result = pd.DataFrame(columns = ['Semestre', 'Año' , 'Unidad' , 'Sexo' , 'Categoria', 'Tabaco' , 'Alcohol', 'Marihuana','Inalables', 'Cocaina' , 'Metanfetaminas', 'OtrosEstimulantes', 'Extasis', 'Benzodiacepinas', 'OtrosDepresores', 'Alucinogenos', 'Heroina', 'OpiodesSinteticos', 'OpioDerivados', 'UtilidadMedica', 'SustanciasDeAbuso', 'Otros'])
    if df_result.empty:
        df_result.loc[0] = 0    
    value_to_column = gen_valueof_column()
    for col in df.columns:
        if col.startswith('SustanciaI'):
            exp = re.search(r'SustanciaI\d+')
            if exp:
                if df['ComunAbstinenciaId'+str(exp.group(1))].iloc[0] in range (1,4):
                    for ind, val in df[col].items():
                        if val in value_to_column:
                            df_result.loc[0, value_to_column[val]] += 1
    df_result['Semestre'][0] = df['Semestre'].iloc[0]
    df_result['Año'][0] = df['AñoRegistro'].iloc[0]
    df_result['Unidad'][0] = 'Total'
    df_result['Categoria'][0] = 'Droga Ultimo Mes'
    df_result['Sexo'][0] = df['SexoId'].iloc[0]
    return df_result

In [219]:
def group_dataUM(df):
    '''     
    Description: Group the data by semester
    return: df_result (dataframe)
    '''
    
    df_result = pd.DataFrame()
    for val , group in df.groupby(['Semestre', 'AñoRegistro', 'SexoId']):
        df_result = pd.concat([df_result, tendencias_MI(group)])
    return df_result

In [220]:
def main ():
    '''
    Description: Main function
    '''
    df = read_data()
    df = mod_data(df)
    df = range_age(df)
    df1 = group_data(df)
    df2 = group_dataUM(df)
#    df.to_csv('results/Tendencias.csv', index=False, encoding='utf-8')
    return df2


In [221]:
warnings.filterwarnings("ignore") 
df = main()

In [222]:
df

Unnamed: 0,Semestre,Año,Unidad,Sexo,Categoria,Tabaco,Alcohol,Marihuana,Inalables,Cocaina,...,Extasis,Benzodiacepinas,OtrosDepresores,Alucinogenos,Heroina,OpiodesSinteticos,OpioDerivados,UtilidadMedica,SustanciasDeAbuso,Otros
0,1,2021,Total,1,Droga impacto,521,1733,1238,186,746,...,7,51,3,25,45,21,3,21,9,8
0,1,2021,Total,2,Droga impacto,446,892,317,45,103,...,4,45,5,19,8,6,2,19,4,7
0,1,2022,Total,1,Droga impacto,965,2688,2126,238,978,...,13,94,16,32,79,25,3,19,11,664
0,1,2022,Total,2,Droga impacto,935,2472,580,55,143,...,5,102,18,8,13,15,1,32,12,1659
0,1,2023,Total,1,Droga impacto,925,2091,2564,221,956,...,10,93,11,41,91,25,4,21,16,2840
0,1,2023,Total,2,Droga impacto,813,1085,712,60,148,...,4,97,12,13,13,20,4,16,13,4933
0,2,2021,Total,1,Droga impacto,577,1698,1272,166,767,...,6,49,5,21,49,19,4,6,9,13
0,2,2021,Total,2,Droga impacto,452,916,301,36,123,...,2,57,12,7,7,8,1,14,3,17
0,2,2022,Total,1,Droga impacto,911,2454,2401,205,873,...,10,82,13,24,67,29,4,18,24,1621
0,2,2022,Total,2,Droga impacto,888,1755,703,37,143,...,5,88,13,13,13,13,1,21,10,3198
