In [28]:
import os
import pandas as pd
import numpy as np
import arviz as az

from cmdstanpy import CmdStanModel, cmdstan_path, set_cmdstan_path

az.style.use("arviz-darkgrid")

In [2]:
set_cmdstan_path(os.path.join('/','opt','conda', 'bin', 'cmdstan'))
cmdstan_path()

'/opt/conda/bin/cmdstan'

In [19]:
stan_file = os.path.join('src', 'model.stan')

In [20]:
model = CmdStanModel(stan_file=stan_file)

16:19:37 - cmdstanpy - INFO - compiling stan file /home/jovyan/Stakeholders-Cooperativas/src/model.stan to exe file /home/jovyan/Stakeholders-Cooperativas/src/model
16:19:51 - cmdstanpy - INFO - compiled model executable: /home/jovyan/Stakeholders-Cooperativas/src/model


In [21]:
print(model)

CmdStanModel: name=model
	 stan_file=/home/jovyan/Stakeholders-Cooperativas/src/model.stan
	 exe_file=/home/jovyan/Stakeholders-Cooperativas/src/model
	 compiler_options=stanc_options={}, cpp_options={}


In [6]:
data_file = os.path.join('data', 'sample.csv')
list_columns = [
    # cooperada
    "NOME", "segmento",
    # respondente
    "sexo", "idade", "escolaridade", "cargo", "anos_trabalhados",
    # cooperada
    "idade_geral", "idade_formal", "numero_cooperados", "pessoas_adm", "acordos", "faturamento",
    # stakeholders
    "federal_1", "federal_2", "federal_3",
    "estadual_municipal_1", "estadual_municipal_2", "estadual_municipal_3",
    "concorrentes_1", "concorrentes_2", "concorrentes_3",
    "fornecedores_1", "fornecedores_2", "fornecedores_3",
    "sociedade_1", "sociedade_2", "sociedade_3",
    "cooperados_1", "cooperados_2", "cooperados_3",
    "federal_4", "federal_5", "federal_6",
    "estadual_municipal_4", "estadual_municipal_5", "estadual_municipal_6",
    "concorrentes_4", "concorrentes_5", "concorrentes_6",
    "fornecedores_4", "fornecedores_5", "fornecedores_6",
    "sociedade_4", "sociedade_5", "sociedade_6",
    "cooperados_4", "cooperados_5", "cooperados_6",
    "federal_7", "federal_8", "federal_9",
    "estadual_municipal_7", "estadual_municipal_8", "estadual_municipal_9",
    "concorrentes_7", "concorrentes_8", "concorrentes_9",
    "fornecedores_7", "fornecedores_8", "fornecedores_9",
    "sociedade_7", "sociedade_8", "sociedade_9",
    "cooperados_7", "cooperados_8", "cooperados_9",
    # cooperada
    "flexibilidade", "valorizacao", "inovacao", "mudancas", "formalismo", "produtividade", "hierarquia", "controle"
]
df = pd.read_csv(data_file, sep=';', encoding='latin-1', names=list_columns, header=1)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 117 entries, COOPERATIVA DE PRODUÇÃO ARTESANAL DO CRUTAC to MARINGÁ CULTURAL COOPERATIVISMO
Data columns (total 75 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   NOME                  117 non-null    object
 1   segmento              117 non-null    object
 2   sexo                  117 non-null    object
 3   idade                 117 non-null    int64 
 4   escolaridade          117 non-null    object
 5   cargo                 117 non-null    object
 6   anos_trabalhados      117 non-null    int64 
 7   idade_geral           117 non-null    int64 
 8   idade_formal          117 non-null    int64 
 9   numero_cooperados     117 non-null    int64 
 10  pessoas_adm           117 non-null    int64 
 11  acordos               117 non-null    int64 
 12  faturamento           117 non-null    int64 
 13  federal_1             117 non-null    object
 14  federal_2             117

In [7]:
df.replace(["Discordo Totalmente", "Discordo", "Não concordo e nem discordo", "Concordo", "Concordo Totalmente"], [-2, -1, 0, 1, 2], inplace=True)

In [8]:
df.replace("Nem concordo nem discordo", 0, inplace=True)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 117 entries, COOPERATIVA DE PRODUÇÃO ARTESANAL DO CRUTAC to MARINGÁ CULTURAL COOPERATIVISMO
Data columns (total 75 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   NOME                  117 non-null    object
 1   segmento              117 non-null    object
 2   sexo                  117 non-null    object
 3   idade                 117 non-null    int64 
 4   escolaridade          117 non-null    object
 5   cargo                 117 non-null    object
 6   anos_trabalhados      117 non-null    int64 
 7   idade_geral           117 non-null    int64 
 8   idade_formal          117 non-null    int64 
 9   numero_cooperados     117 non-null    int64 
 10  pessoas_adm           117 non-null    int64 
 11  acordos               117 non-null    int64 
 12  faturamento           117 non-null    int64 
 13  federal_1             117 non-null    int64 
 14  federal_2             117

In [10]:
df.head()

Unnamed: 0,NOME,segmento,sexo,idade,escolaridade,cargo,anos_trabalhados,idade_geral,idade_formal,numero_cooperados,...,cooperados_8,cooperados_9,flexibilidade,valorizacao,inovacao,mudancas,formalismo,produtividade,hierarquia,controle
COOPERATIVA DE PRODUÇÃO ARTESANAL DO CRUTAC,Produtos Artesanal,Márcia Maria De Oliveira,Feminino,67,Ensino Superior,Presidente,30,54,54,120,...,-1,1,0,1,2,2,2,2,2,1
COOPERATIVA AMAZONENSE DE ARTESANATO,Artesanatato,Larissa,Feminino,65,Pós Graduado,Diretor,8,7,7,52,...,0,0,2,2,2,2,2,2,2,2
COOPERATIVA DE TRABALHADORES NO ESTADO DO TOCANTINS - COOPERTAB,Educacional,Giovanna,Feminino,37,Pós Graduado,Vice Presidente.,12,12,12,43,...,1,1,2,2,1,1,1,1,2,1
COOPERATIVA BORDANA,Bordado Manual,Celma Grace,Feminino,52,Pós Graduado,Diretora Presidentea,14,11,11,28,...,1,1,2,2,2,2,2,2,1,2
COOPERATIVA ARTESANAL MISTA DE PARNAÍBA LTDA,Ramo Trabalho,Sara,Feminino,60,Pós Graduado,Presidente,20,20,20,15,...,0,0,1,1,1,1,0,1,1,0


In [11]:
# atributos
df.iloc[:, 6:12].describe()

Unnamed: 0,anos_trabalhados,idade_geral,idade_formal,numero_cooperados,pessoas_adm,acordos
count,117.0,117.0,117.0,117.0,117.0,117.0
mean,10.376068,14.675214,14.094017,65.196581,3.846154,35.188034
std,6.299598,12.460855,12.363728,77.587352,4.652799,351.071363
min,3.0,4.0,3.0,6.0,0.0,0.0
25%,5.0,7.0,7.0,21.0,2.0,0.0
50%,9.0,11.0,10.0,34.0,3.0,2.0
75%,14.0,15.0,15.0,64.0,4.0,5.0
max,31.0,54.0,54.0,360.0,48.0,3800.0


In [15]:
# criar variaveis
df["stakeholders_total"] = df.iloc[:, 13:67].sum(axis=1)
df["cultura_total"] = df.iloc[:, 67:75].sum(axis=1)
df["cultura_idx"] = pd.cut(df["cultura_total"], 3, labels=[1, 2, 3])

In [16]:
# Variavel dependente stats
df["stakeholders_total"].describe()

count    117.000000
mean      16.726496
std       25.655753
min      -39.000000
25%        4.000000
50%       19.000000
75%       33.000000
max       60.000000
Name: stakeholders_total, dtype: float64

In [23]:
# dados para o modelo
data = {
    'N': df.shape[0],
    'K': df.iloc[:, 6:12].shape[1],
    'X': df.iloc[:, 6:12].to_numpy(),
    'y': df["stakeholders_total"].to_numpy(),
    'J': 3,
    'idx': df["cultura_idx"].to_numpy(),
}

In [24]:
fit = model.sample(data=data, parallel_chains=4)

16:20:44 - cmdstanpy - INFO - CmdStan start processing
chain 1:   0% 0/22 [00:00<?, ?it/s[{'value': 'Status'}]]
chain 2:   0% 0/22 [00:00<?, ?it/s[{'value': 'Status'}]][A

chain 3:   0% 0/22 [00:00<?, ?it/s[{'value': 'Status'}]][A[A


chain 4:   0% 0/22 [00:00<?, ?it/s[{'value': 'Status'}]][A[A[A
chain 1:   9% 2/22 [00:00<00:01, 14.10it/s[{'value': 'Iteration:    1 / 2000 [  0%]  (Warmup)'}]][A

chain 3:   9% 2/22 [00:00<00:01, 12.93it/s[{'value': 'Iteration:    1 / 2000 [  0%]  (Warmup)'}]][A[A


chain 4:   9% 2/22 [00:00<00:01, 11.44it/s[{'value': 'Iteration:    1 / 2000 [  0%]  (Warmup)'}]][A[A[A
chain 1:  27% 6/22 [00:00<00:00, 24.52it/s[{'value': 'Iteration:  400 / 2000 [ 20%]  (Warmup)'}]][A

chain 3:  27% 6/22 [00:00<00:00, 23.11it/s[{'value': 'Iteration:  400 / 2000 [ 20%]  (Warmup)'}]][A[A


chain 4:  27% 6/22 [00:00<00:00, 22.38it/s[{'value': 'Iteration:  400 / 2000 [ 20%]  (Warmup)'}]][A[A[A
chain 1:  50% 11/22 [00:00<00:00, 34.04it/s[{'value': 'Iteration: 

                                                                                                                                                                                                                                                                                                                                


16:20:45 - cmdstanpy - INFO - CmdStan done processing.
Exception: normal_id_glm_lpdf: Scale vector is inf, but must be positive finite! (in '/home/jovyan/Stakeholders-Cooperativas/src/model.stan', line 29, column 2 to column 50)
	Exception: normal_id_glm_lpdf: Scale vector is inf, but must be positive finite! (in '/home/jovyan/Stakeholders-Cooperativas/src/model.stan', line 29, column 2 to column 50)
	Exception: normal_id_glm_lpdf: Scale vector is inf, but must be positive finite! (in '/home/jovyan/Stakeholders-Cooperativas/src/model.stan', line 29, column 2 to column 50)
	Exception: normal_id_glm_lpdf: Scale vector is inf, but must be positive finite! (in '/home/jovyan/Stakeholders-Cooperativas/src/model.stan', line 29, column 2 to column 50)
	Exception: normal_id_glm_lpdf: Scale vector is inf, but must be positive finite! (in '/home/jovyan/Stakeholders-Cooperativas/src/model.stan', line 29, column 2 to column 50)
	Exception: normal_id_glm_lpdf: Scale vector is inf, but must be posit




In [25]:
fit.summary()

Unnamed: 0,Mean,MCSE,StdDev,5%,50%,95%,N_Eff,N_Eff/s,R_hat
lp__,-792.487,0.104141,3.04813,-798.308,-792.032,-788.323,856.695,750.171,1.00731
alpha,3.45441,0.285751,7.08216,-3.642,1.62403,19.4454,614.263,537.883,1.00913
beta[1],-0.25378,0.003364,0.217687,-0.613809,-0.253056,0.105348,4187.19,3666.54,0.999735
beta[2],0.659681,0.013564,0.669112,-0.452318,0.656719,1.7676,2433.32,2130.76,1.00124
beta[3],-1.17123,0.013772,0.672342,-2.28923,-1.17837,-0.050672,2383.45,2087.08,1.00137
beta[4],0.035988,0.000234,0.014985,0.011106,0.035934,0.060529,4084.85,3576.92,0.999672
beta[5],-1.38603,0.010235,0.557511,-2.31606,-1.37152,-0.471131,2967.17,2598.23,1.00018
beta[6],0.030177,0.000137,0.007436,0.017872,0.030181,0.042366,2952.08,2585.01,1.0001
sigma,11.80162,0.00583,0.35263,11.2313,11.7912,12.403,3664.40023,3208.75677,1.00321
tau,27.62518,0.46146,15.28539,10.7261,23.8249,55.5215,1097.21162,960.78076,1.00454


In [26]:
print(fit.diagnose())

Processing csv files: /tmp/tmp588geo8s/model14m16rzx/model-20230308162044_1.csv, /tmp/tmp588geo8s/model14m16rzx/model-20230308162044_2.csv, /tmp/tmp588geo8s/model14m16rzx/model-20230308162044_3.csv, /tmp/tmp588geo8s/model14m16rzx/model-20230308162044_4.csv

Checking sampler transitions treedepth.
Treedepth satisfactory for all transitions.

Checking sampler transitions for divergences.
5 of 4000 (0.12%) transitions ended with a divergence.
These divergent transitions indicate that HMC is not fully able to explore the posterior distribution.
Try increasing adapt delta closer to 1.
If this doesn't remove all divergences, try to reparameterize the model.

Checking E-BFMI - sampler transitions HMC potential energy.
E-BFMI satisfactory.

Effective sample size satisfactory.

Split R-hat values satisfactory all parameters.

Processing complete.

