In [13]:
import pandas as pd
import numpy as np

# Dados dos coeficientes
data = {
    "Variable": ["(Intercept)", "Setor.CNAECOMERCIO E SERVICOS", "Setor.CNAEINDUSTRIA DE TRANSFORMACAO",
                 "Setor.CNAEINDUSTRIA EXTRATIVA", "Porte.do.clienteMICRO", "Porte.do.clienteMEDIA",
                 "Porte.do.clientePEQUENA", "Proporção.do.Valor.Setorial.ao.PIB.do.UF", "Contribuição.do.UF.ao.PIB.Nacional"],
    "Estimate": [15.35381, -0.24586, 0.33142, -0.11198, -2.46802, -1.76636, -2.41011, 0.01558, 1.20020],
    "Std. Error": [0.09671, 0.12596, 0.09834, 0.38721, 0.18893, 0.04211, 0.13701, 0.05595, 0.13506],
    "t value": [158.753, -1.952, 3.370, -0.289, -13.063, -41.884, -17.524, 0.278, 8.887],
    "Pr(>|t|)": ["<2e-16", "0.050997", "0.000756", "0.772435", "<2e-16", "<2e-16", "<2e-16", "0.780683", "<2e-16"]
}

# Criar DataFrame
df = pd.DataFrame(data)

# Converter Pr(>|t|) para valores numéricos, preservando a notação original para valores muito pequenos
def convert_p_value(val):
    if val.startswith("<"):
        return 0  # Considerando esses valores extremamente pequenos como 0
    else:
        return float(val)

df['Pr(>|t|)'] = df['Pr(>|t|)'].apply(convert_p_value)

# Filtrar apenas as linhas com Pr(>|t|) <= 0.05, mas sempre incluir o setor de comércio e serviços
significant_df = df[(df['Pr(>|t|)'] <= 0.05) | (df['Variable'] == "Setor.CNAECOMERCIO E SERVICOS")].copy()

# Reverter os valores 0 para "<2e-16"
significant_df.loc[significant_df['Pr(>|t|)'] == 0, 'Pr(>|t|)'] = "<2e-16"

# Calcular o expoente dos coeficientes
significant_df.loc[:, "Exponentiated Estimate"] = np.exp(significant_df["Estimate"])

# Calcular a mudança percentual
significant_df.loc[:, "Percentual Change"] = (significant_df["Exponentiated Estimate"] - 1) * 100

# Formatando as colunas Exponentiated Estimate e Percentual Change para melhor legibilidade
significant_df.loc[:, "Exponentiated Estimate"] = significant_df["Exponentiated Estimate"].apply(lambda x: f"{x:,.2f}")
significant_df.loc[:, "Percentual Change"] = significant_df["Percentual Change"].apply(lambda x: f"{x:,.2f}%")

significant_df = significant_df.reset_index(drop=True)

# Salvar o DataFrame filtrado e formatado em Excel
significant_df.to_excel("regressao_significativa_semUF.xlsx", index=False)
significant_df

  significant_df.loc[significant_df['Pr(>|t|)'] == 0, 'Pr(>|t|)'] = "<2e-16"
  significant_df.loc[:, "Exponentiated Estimate"] = significant_df["Exponentiated Estimate"].apply(lambda x: f"{x:,.2f}")
 '232.08%']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  significant_df.loc[:, "Percentual Change"] = significant_df["Percentual Change"].apply(lambda x: f"{x:,.2f}%")


Unnamed: 0,Variable,Estimate,Std. Error,t value,Pr(>|t|),Exponentiated Estimate,Percentual Change
0,(Intercept),15.35381,0.09671,158.753,<2e-16,4656664.61,"465,666,360.58%"
1,Setor.CNAECOMERCIO E SERVICOS,-0.24586,0.12596,-1.952,0.050997,0.78,-21.80%
2,Setor.CNAEINDUSTRIA DE TRANSFORMACAO,0.33142,0.09834,3.37,0.000756,1.39,39.29%
3,Porte.do.clienteMICRO,-2.46802,0.18893,-13.063,<2e-16,0.08,-91.52%
4,Porte.do.clienteMEDIA,-1.76636,0.04211,-41.884,<2e-16,0.17,-82.90%
5,Porte.do.clientePEQUENA,-2.41011,0.13701,-17.524,<2e-16,0.09,-91.02%
6,Contribuição.do.UF.ao.PIB.Nacional,1.2002,0.13506,8.887,<2e-16,3.32,232.08%


In [20]:
import pandas as pd
import numpy as np

# Dados dos coeficientes para a primeira parte
data_part1 = {
    "Variable": [
        "(Intercept)", "UFAlagoas", "UFAmazonas", "UFBahia", "UFCeará", "UFEspírito Santo", 
        "UFGoiás", "UFMato Grosso", "UFMato Grosso do Sul", "UFMinas Gerais", "UFPará", 
        "UFParaíba", "UFParaná", "UFPernambuco", "UFPiauí", "UFRio de Janeiro", "UFRio Grande do Norte", 
        "UFRio Grande do Sul", "UFRondônia", "UFSanta Catarina", "UFSão Paulo", "UFSergipe", 
        "UFTocantins", "Setor.CNAECOMERCIO E SERVICOS", "Setor.CNAEINDUSTRIA DE TRANSFORMACAO", 
        "Setor.CNAEINDUSTRIA EXTRATIVA", "Porte.do.clienteMEDIA", "Porte.do.clienteMICRO", "Porte.do.clientePEQUENA"
    ],
    "Estimate": [
        12.18308, 3.03169, 4.07721, 3.61986, 3.07468, 3.29770, 2.71205, 2.50728, 2.18979, 
        3.95481, 2.19161, 3.33676, 3.19997, 2.69673, 2.57284, 3.81982, 2.69302, 3.24589, 
        2.16039, 3.19108, 3.73108, 2.11954, 1.17385, -0.43803, 0.14410, -0.25537, -1.71633, 
        -2.53041, -2.39111
    ],
    "Std. Error": [
        1.59296, 1.65248, 1.56321, 1.59362, 1.57483, 1.57421, 1.60882, 1.58122, 1.72379, 
        1.60511, 1.60056, 1.58398, 1.60008, 1.58236, 1.54476, 1.61160, 1.55596, 1.59963, 
        1.54016, 1.59016, 1.61283, 1.58416, 1.62548, 0.13514, 0.10808, 0.38722, 0.04186, 
        0.18761, 0.13587
    ],
    "t value": [
        7.648, 1.835, 2.608, 2.271, 1.952, 2.095, 1.686, 1.586, 1.270, 2.464, 1.369, 2.107, 
        2.000, 1.704, 1.666, 2.370, 1.731, 2.029, 1.403, 2.007, 2.313, 1.338, 0.722, -3.241, 
        1.333, -0.660, -41.000, -13.488, -17.598
    ],
    "Pr(>|t|)": [
        2.31e-14, 0.06660, 0.00912, 0.02315, 0.05093, 0.03622, 0.09189, 0.11286, 0.20401, 
        0.01377, 0.17096, 0.03519, 0.04555, 0.08837, 0.09585, 0.01780, 0.08353, 0.04248, 
        0.16075, 0.04481, 0.02073, 0.18095, 0.47022, 0.00120, 0.18248, 0.50959, 2e-16, 
        2e-16, 2e-16
    ]
}

# Criar DataFrame para a primeira parte
df = pd.DataFrame(data_part1)

# Remover linhas onde Pr(>|t|) > 0.06
df = df[df["Pr(>|t|)"] <= 0.06]

# Calcular o expoente dos coeficientes
df["Exponentiated Estimate"] = np.exp(df["Estimate"])

# Calcular a mudança percentual
df["Percentual Change"] = (df["Exponentiated Estimate"] - 1) * 100

# Formatando as colunas Exponentiated Estimate e Percentual Change para melhor legibilidade
df["Exponentiated Estimate"] = df["Exponentiated Estimate"].apply(lambda x: f"{x:,.2f}".replace('.', ','))
df["Percentual Change"] = df["Percentual Change"].apply(lambda x: f"{x:,.2f}%".replace('.', ','))
df["Estimate"] = df["Estimate"].apply(lambda x: f"{x:,.2f}%".replace('.', ','))
df["Std. Error"] = df["Std. Error"].apply(lambda x: f"{x:,.2f}%".replace('.', ','))
df["t value"] = df["t value"].apply(lambda x: f"{x:,.2f}%".replace('.', ','))
df["Pr(>|t|)"] = df["Pr(>|t|)"].apply(lambda x: f"{x:,.2f}%".replace('.', ','))



# Informações de resumo do modelo
model_summary = {
    "Residual standard error": "1.284 on 7049 degrees of freedom",
    "Observations deleted due to missingness": 1,
    "Multiple R-squared": 0.2805,
    "Adjusted R-squared": 0.2775,
    "F-statistic": "94.75 on 29 and 7049 DF",
    "p-value": "< 2.2e-16"
}

# Função para imprimir as informações de maneira formatada
def print_model_summary(summary):
    print("Model Summary Statistics:")
    print("----------------------------")
    for key, value in summary.items():
        print(f"{key}: {value}")

# Chamando a função para imprimir o resumo do modelo
print_model_summary(model_summary)

# Exibir o DataFrame final após filtragem
df = df.reset_index(drop=True)
df


Model Summary Statistics:
----------------------------
Residual standard error: 1.284 on 7049 degrees of freedom
Observations deleted due to missingness: 1
Multiple R-squared: 0.2805
Adjusted R-squared: 0.2775
F-statistic: 94.75 on 29 and 7049 DF
p-value: < 2.2e-16


Unnamed: 0,Variable,Estimate,Std. Error,t value,Pr(>|t|),Exponentiated Estimate,Percentual Change
0,(Intercept),"12,18%","1,59%","7,65%","0,00%",19545393,"19,545,293,42%"
1,UFAmazonas,"4,08%","1,56%","2,61%","0,01%",5898,"5,798,07%"
2,UFBahia,"3,62%","1,59%","2,27%","0,02%",3733,"3,633,23%"
3,UFCeará,"3,07%","1,57%","1,95%","0,05%",2164,"2,064,30%"
4,UFEspírito Santo,"3,30%","1,57%","2,10%","0,04%",2705,"2,605,04%"
5,UFMinas Gerais,"3,95%","1,61%","2,46%","0,01%",5219,"5,118,58%"
6,UFParaíba,"3,34%","1,58%","2,11%","0,04%",2813,"2,712,78%"
7,UFParaná,"3,20%","1,60%","2,00%","0,05%",2453,"2,353,18%"
8,UFRio de Janeiro,"3,82%","1,61%","2,37%","0,02%",4560,"4,459,60%"
9,UFRio Grande do Sul,"3,25%","1,60%","2,03%","0,04%",2568,"2,468,46%"


In [22]:
df.to_excel("regressao_significativa_comUF.xlsx")