In [1]:
import pandas as pd
import statsmodels.api as sm
import numpy as np

# Carregar os dados
tcc_data = pd.read_csv("dadosTCC_final.csv")

# Preparar variáveis dependentes e independentes
dependent_var = tcc_data['Valor Desembolsado em Reais']
df_dummy = tcc_data.drop(columns=['Proporção do Valor Setorial ao PIB do UF','Contribuição do UF ao PIB Nacional (%)','Ano', 'Cliente', 'Valor Desembolsado em Reais', 'Subsetor CNAE Agrupado', 'Instituição Financeira Credenciada', 'Valor da Operação em R$'])

df_dummy.drop(df_dummy.tail(1).index,inplace=True) # drop last n rows

# Criar variáveis dummy para dados categóricos
df_dummy = pd.get_dummies(df_dummy, drop_first=True)

# Remover qualquer linha com NaN e converter para tipo numérico
df_dummy = df_dummy.dropna().astype(float)  # Assegura que todos os dados são float

# Garantir que dependent_var está alinhada com df_dummy
tcc_data = tcc_data.loc[df_dummy.index]  # Alinha tcc_data com df_dummy
dependent_var = tcc_data['Valor Desembolsado em Reais']

# Resetar índices para garantir alinhamento
dependent_var = dependent_var.reset_index(drop=True)
df_dummy = df_dummy.reset_index(drop=True)

In [2]:
# Criar o modelo de regressão OLS
model = sm.OLS(dependent_var, df_dummy)

# Ajustar o modelo
results = model.fit()

# Salvando os resultados em um csv
tables = results.summary().tables
dataframes = [pd.DataFrame(table[1:], columns=table[0]) for table in tables]

csv_path = 'regTCC_sem_contribuicao_com_UF_.csv'

with open(csv_path, 'w') as f:
    for i, df in enumerate(dataframes):
        f.write(f"\nTable {i + 1}: {results.summary().extra_txt[i].strip()}\n")
        
        df.to_csv(f, index=False)

        f.write('\n')
results.summary()


0,1,2,3
Dep. Variable:,Valor Desembolsado em Reais,R-squared (uncentered):,0.164
Model:,OLS,Adj. R-squared (uncentered):,0.161
Method:,Least Squares,F-statistic:,49.46
Date:,"Wed, 29 May 2024",Prob (F-statistic):,3.63e-249
Time:,13:19:30,Log-Likelihood:,-134550.0
No. Observations:,7079,AIC:,269200.0
Df Residuals:,7051,BIC:,269300.0
Df Model:,28,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
UF_Alagoas,3.93e+06,1.98e+07,0.199,0.842,-3.48e+07,4.27e+07
UF_Amazonas,3.456e+07,7.99e+06,4.325,0.000,1.89e+07,5.02e+07
UF_Bahia,1.582e+07,4.99e+06,3.169,0.002,6.03e+06,2.56e+07
UF_Ceará,7.406e+06,4.48e+06,1.653,0.098,-1.38e+06,1.62e+07
UF_Espírito Santo,1.926e+07,5.44e+06,3.540,0.000,8.59e+06,2.99e+07
UF_Goiás,2.907e+06,1.18e+07,0.247,0.805,-2.02e+07,2.6e+07
UF_Mato Grosso,1.108e+07,9.7e+06,1.142,0.253,-7.93e+06,3.01e+07
UF_Mato Grosso do Sul,-1.775e+06,2.54e+07,-0.070,0.944,-5.15e+07,4.8e+07
UF_Minas Gerais,2.01e+07,3.91e+06,5.142,0.000,1.24e+07,2.78e+07

0,1,2,3
Omnibus:,16307.531,Durbin-Watson:,1.738
Prob(Omnibus):,0.0,Jarque-Bera (JB):,210508100.256
Skew:,22.073,Prob(JB):,0.0
Kurtosis:,846.646,Cond. No.,55.4
