In [24]:
# Importing the required library
import pandas as pd

# Reading the Excel file into a Pandas DataFrame
df = pd.read_excel('Data/Data_BA_modified.xlsx')

# Displaying the first 5 rows of the DataFrame to verify that the data has been read correctly
df

Unnamed: 0,Team,Indsatslængde,PrisIAlt,alder,alder_ved_hændelse,status,personID_serial,caseworker_serial,sag_varighed,Indsats_Familiebehandling,...,BaggrundBarnet_BaggrundBarnet_Anden form for omsorgssvigt over for barn/ung,BaggrundBarnet_BaggrundBarnet_Betydelig eller varigt nedsat fysisk eller psykisk funktionsevne hos barn/ung,BaggrundBarnet_BaggrundBarnet_Overgreb mod barn/ung f.eks. seksuelt eller voldeligt,BaggrundHjemmet_BaggrundHjemmet_Anden bekymrende adfærd hos forældre,BaggrundHjemmet_BaggrundHjemmet_Betydelig eller varigt nedsat fysisk eller psykisk funktionsevne hos forældre,BaggrundHjemmet_BaggrundHjemmet_Højt konfliktniveau eller vold i hjemmet mellem voksne,BaggrundHjemmet_BaggrundHjemmet_Misbrug hos forældre,Underretter_Anden_Kommune,Underretter_Sundhedsvæsen,Underretter_Socialrådgiver
0,BBU City-Østerbro - MY Børnegruppen,90,28091,6,3,lukket,629,1,507,1,...,0,0,0,0,0,0,0,0,0,0
1,BBU City-Østerbro - MY Børnegruppen,161,47375,2,0,lukket,604,1,811,1,...,0,0,0,0,0,0,0,1,0,0
2,BBU City-Østerbro - MY Børnegruppen,0,39,1,0,lukket,34,1,848,1,...,1,0,0,1,0,0,0,0,1,0
3,BBU Valby-Vesterbro-Kgs.Enghave - Styrket modt...,239,141911,2,0,lukket,167,2,593,1,...,0,0,0,0,0,0,1,0,0,0
4,,123,49000,5,3,lukket,671,3,756,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
646,BBU Valby-Vesterbro-Kgs.Enghave - Børneliv,134,14880,7,4,lukket,420,296,456,1,...,0,0,0,0,0,0,0,0,0,0
647,BBU Valby-Vesterbro-Kgs.Enghave - Børneliv,182,61639,5,5,lukket,316,296,140,1,...,0,0,0,0,0,0,0,0,0,0
648,BBU Valby-Vesterbro-Kgs.Enghave - Børneliv,112,36903,6,2,lukket,446,296,1141,1,...,0,0,0,0,0,0,0,0,0,0
649,BBU Valby-Vesterbro-Kgs.Enghave - Børneliv,899,101450,7,3,lukket,254,296,1195,1,...,0,0,0,0,0,0,0,0,0,0


In [25]:
import statsmodels.api as sm

# Prepare the results dictionary to store regression summaries
results = {}

# 1. Simple regression with sag_varighed as Y and PrisIAlt as X
X1 = sm.add_constant(df['PrisIAlt'])
y = df['sag_varighed']
model1 = sm.OLS(y, X1).fit()
results['model1'] = model1.summary()

# 2. Simple regression with sag_varighed as Y and alder_ved_hændelse as X
X2 = sm.add_constant(df['alder_ved_hændelse'])
model2 = sm.OLS(y, X2).fit()
results['model2'] = model2.summary()

# 3. sag_varighed as Y, PrisIAlt as beta1 and alder_ved_hændelse as control
X3 = sm.add_constant(df[['PrisIAlt', 'alder_ved_hændelse']])
model3 = sm.OLS(y, X3).fit()
results['model3'] = model3.summary()

prefixes = ['Underretter', 'BaggrundBarnet', 'BaggrundHjemmet', 'Område']
prefix_columns = {prefix: [col for col in df.columns if col.startswith(prefix)] for prefix in prefixes}


# 4. sag_varighed as Y, PrisIAlt as beta1, and the following controls: alder_ved_hændelse, and columns with prefixes
control_vars = ['alder_ved_hændelse'] + ['Indsatslængde'] + [col for prefix in prefixes for col in prefix_columns[prefix]]
X4 = sm.add_constant(df[['PrisIAlt'] + control_vars])
model4 = sm.OLS(y, X4).fit()
results['model4'] = model4.summary()

# Display the summary statistics for each model
results


{'model1': <class 'statsmodels.iolib.summary.Summary'>
 """
                             OLS Regression Results                            
 Dep. Variable:           sag_varighed   R-squared:                       0.000
 Model:                            OLS   Adj. R-squared:                 -0.001
 Method:                 Least Squares   F-statistic:                    0.2877
 Date:                Mon, 23 Oct 2023   Prob (F-statistic):              0.592
 Time:                        19:12:17   Log-Likelihood:                -4906.4
 No. Observations:                 651   AIC:                             9817.
 Df Residuals:                     649   BIC:                             9826.
 Df Model:                           1                                         
 Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
 -----------------------------------------------------------

In [30]:
# Initialize LaTeX table
latex_table = "\\begin{table}\n"
latex_table += "\\caption{OLS modified}\n"
latex_table += "\\label{}\n"
latex_table += "\\begin{center}\n"
latex_table += "\\begin{tabular}{lllll}\n"
latex_table += "\\hline\n"
latex_table += " & Model 1 & Model 2 & Model 3 & Model 4 \\\\\n"
latex_table += "\\hline\n"

# Define variables and statistics of interest
variables = ['const', 'PrisIAlt', 'alder_ved_hændelse', 'Indsatslængde']
statistics = ['R-squared', 'R-squared Adj.', 'F-statistic']

# Add coefficients and standard errors for each variable
for var in variables:
    latex_table += f"{var} & "
    for model_name in ['model1', 'model2', 'model3', 'model4']:
        model = results[model_name]
        if var in model.params.index:
            coef = model.params[var]
            latex_table += f"{coef:.4f} & "
        else:
            latex_table += " & "
    latex_table = latex_table.rstrip(" & ")
    latex_table += " \\\\\n"
    latex_table += " & "
    for model_name in ['model1', 'model2', 'model3', 'model4']:
        model = results[model_name]
        if var in model.bse.index:
            std_err = model.bse[var]
            latex_table += f"({std_err:.4f}) & "
        else:
            latex_table += " & "
    latex_table = latex_table.rstrip(" & ")
    latex_table += " \\\\\n"

# Add statistics
for stat in statistics:
    latex_table += f"{stat} & "
    for model_name in ['model1', 'model2', 'model3', 'model4']:
        model = results[model_name]
        if stat == 'R-squared':
            value = model.rsquared
        elif stat == 'R-squared Adj.':
            value = model.rsquared_adj
        elif stat == 'F-statistic':
            value = model.fvalue
        latex_table += f"{value:.4f} & "
    latex_table = latex_table.rstrip(" & ")
    latex_table += " \\\\\n"

# Close LaTeX table
latex_table += "\\hline\n"
latex_table += "\\end{tabular}\n"
latex_table += "\\end{center}\n"
latex_table += "\\end{table}"

# Print the LaTeX table
print(latex_table)


\begin{table}
\caption{OLS modified}
\label{}
\begin{center}
\begin{tabular}{lllll}
\hline
 & Model 1 & Model 2 & Model 3 & Model 4 \\
\hline
const & 546.6001 & 491.6555 & 486.1726 & 539.4914 \\
 & (21.9826) & (29.8438) & (32.2008) & (87.0744) \\
PrisIAlt & 0.0001 &  & 0.0001 & -0.0001 \\
 & (0.0002) &  & (0.0002) & (0.0002) \\
alder_ved_hændelse &  & 24.4923 & 24.3514 & 19.1466 \\
 &  & (9.5063) & (9.5172) & (11.1270) \\
Indsatslængde &  &  &  & 0.3112 \\
 &  &  &  & (0.1060) \\
R-squared & 0.0004 & 0.0101 & 0.0104 & 0.1613 \\
R-squared Adj. & -0.0011 & 0.0086 & 0.0074 & 0.0853 \\
F-statistic & 0.2877 & 6.6379 & 3.4185 & 2.1226 \\
\hline
\end{tabular}
\end{center}
\end{table}
