## Data

In [None]:
import pandas as pd

data = pd.read_csv("src/difference_in_difference_with_deep_learning/data/testdata.csv")

In [None]:
import yaml

with open(
    "src/difference_in_difference_with_deep_learning/data_management/data_info.yaml",
) as file:
    data_info = yaml.safe_load(file)

In [None]:
data_select_1 = data[data_info["categorical_columns"][0]]
data_select_2 = data[data_info["categorical_columns"][1]]

In [None]:
data.loc[:, data_info["causal_effect"]] = data_select_1 * data_select_2

## Summary

In [None]:
import statsmodels.formula.api as smf

In [None]:
def did_regression(data, data_info):
    # Extract variables from data_info
    outcome_variable = data_info["outcome"]
    causal_effect_variable = data_info["causal_effect"]
    categorical_columns = data_info["categorical_columns"]
    control_columns = data_info["control_columns"]

    # Create formula string
    formula = f"{outcome_variable} ~ {causal_effect_variable} + {' + '.join(categorical_columns)}  + {' + '.join(control_columns)}"

    # Set up the regression model
    reg_model = smf.ols(formula=formula, data=data)

    # Fit the regression model
    results = reg_model.fit()

    # Return the summary of the regression results
    return results.summary2()

## Summary of how to change summary output for my need (postponed)

In [None]:
summary = did_regression(data, data_info)
print(summary)

In [None]:
data0 = summary.tables[0]

In [None]:
data0

In [None]:
adj_r_squared_row = data0.loc[data0[0] == "Adj. R-squared:"]
no_observations_row = data0.loc[data0[3] == "No. Observations:"]

In [None]:
data1 = summary.tables[1]
index = ["Intercept", "interaction", "FQ", "Reform", "Age", "WagePartner"]
summary_df = pd.DataFrame(data, index=index)
summary_df = summary_df.round(3)
print(summary_df)

In [None]:
summary_df = pd.concat(
    [summary_df, adj_r_squared_row, no_observations_row],
    ignore_index=True,
)

In [None]:
summary_df

In [None]:
latex_table = summary_df.to_latex()

print(latex_table)

In [None]:
for table in summary.tables:
    print(table.as_latex_tabular())

# Groupby richtig machen

In [None]:
def estimate_regression(data, data_info):
    """Estimate regression models for each time period and summarize the results.

    Parameters:
        data (DataFrame): The dataset containing all variables.
        data_info (dict): Dictionary containing data configuration information.

    Returns:
        DataFrame: A DataFrame containing summary statistics for each time period.
    """
    results_list = []

    for time_period in data[data_info["time"]].unique():
        # Filter the data for the current time period
        data_time_period = data[data[data_info["time"]] == time_period]

        # Define the regression formula
        formula = f"{data_info['outcome']} ~ {data_info['causal_effect']} + {' + '.join(data_info['categorical_columns'] )} + {' + '.join(data_info['control_columns'])}"

        # Fit the regression model
        reg_model = smf.ols(formula=formula, data=data_time_period)
        results = reg_model.fit()

        # Extract coefficient, std. error, and p-value
        coefficient = results.params[data_info["causal_effect"]]
        std_error = results.bse[data_info["causal_effect"]]
        p_value = results.pvalues[data_info["causal_effect"]]

        # Calculate the control mean
        control_mean = data_time_period[data_info["outcome"]].mean()

        # Calculate the difference between treatment and control groups
        difference_tc = (
            data_time_period[data_time_period[data_info["causal_effect"]] == 1][
                data_info["outcome"]
            ].mean()
            - control_mean
        )

        # Calculate the difference with controls
        # Assume mean of interaction term for simplicity
        data_time_period[data_info["causal_effect"]].mean()
        difference_tc_controls = (
            difference_tc
            - coefficient
            * (
                data_time_period[data_info["control_columns"]]
                - data_time_period[data_info["control_columns"]].mean()
            )
            .mean()
            .sum()
        )

        # Append the results to the list
        results_list.append(
            {
                "Time Period": time_period,
                "Control Mean": control_mean,
                "Difference T-C": difference_tc,
                "Difference T-C with Controls": difference_tc_controls,
                "Coefficient": coefficient,
                "Std. Error": std_error,
                "P-value": p_value,
            },
        )

    # Convert the list of dictionaries to a DataFrame
    return pd.DataFrame(results_list)

In [None]:
result_df = estimate_regression(data, data_info)

In [None]:
result_df

In [None]:
import matplotlib.pyplot as plt

df = pd.DataFrame(data)

time_order = ["t-2", "t-1", "t+1", "t+2", "t+3"]

# Convert Time column to categorical with defined order
df["time"] = pd.Categorical(df["time"], categories=time_order, ordered=True)


# Calculate average wage for each group over time
grouped = df.groupby(["FQ", "Reform", "time"])["wage_year"].mean().reset_index()

# Separate data for the two groups
group0 = grouped[(grouped["FQ"] == 0) & (grouped["Reform"] == 0)]
group1 = grouped[(grouped["FQ"] == 1) & (grouped["Reform"] == 1)]

# Calculate counterfactual average wage for each time period
counterfactual = group0.copy()
counterfactual["wage_year"] += group1["wage_year"].mean() - group0["wage_year"].mean()


# Plotting
plt.figure(figsize=(10, 5))
plt.plot(group0["time"], group0["wage_year"], label="Group 0", marker="o")
plt.plot(group1["time"], group1["wage_year"], label="Group 1", marker="o")
plt.plot(
    counterfactual["time"],
    counterfactual["wage_year"],
    label="Counterfactual",
    linestyle="--",
    marker="o",
)


# Add labels and legend
plt.xlabel("Time")
plt.ylabel("Average Wage")
plt.title("Difference-in-Differences Plot")
plt.legend()
plt.grid(True)

# Show plot
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# new approach

# deep learning estimation

In [None]:
import sys
import time

import matplotlib.pyplot as plt

# Stopping Tensorflow from printing info messages and warnings.
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.compat.v1 as tf

FLAGS = tf.flags.FLAGS

tf.flags.DEFINE_boolean("update", False, """Record the simulation results.""")
tf.flags.DEFINE_boolean("plot_true", False, """Show plots.""")
tf.flags.DEFINE_boolean("verbose", True, """Show detailed messages.""")
tf.flags.DEFINE_integer("nsimulations", 1, """How many simulations to run.""")
tf.flags.DEFINE_integer(
    "nconsumer_characteristics",
    100,
    """Number of consumer characteristics.""",
)
tf.flags.DEFINE_string(
    "treatment",
    "not_random",
    """Are customers treated at random or not.""",
)
tf.flags.DEFINE_string(
    "model",
    "quadratic",
    """Is the mapping from consumer characteristics
                       to their preferences linear or quadratic.""",
)
tf.flags.DEFINE_string(
    "architecture",
    "architecture_1_",
    """Which NN architecture to use.""",
)
tf.flags.DEFINE_integer("data_seed", None, """Seed to use to create fake data.""")

# Manually set sys.argv to simulate command-line invocation
sys.argv = [sys.argv[0]]

# Parse flags
FLAGS(sys.argv)

# Access remaining arguments after parsing flags
remaining_args = [arg for arg in sys.argv[1:] if arg.startswith("--")]
assert remaining_args == []

In [None]:
# Different architectures for the first NN
if FLAGS.architecture == "architecture_1_":
    hidden_layer_sizes = [20, 10, 5]
    dropout_rates_train = [0, 0, 0, 0]
    activation_functions = ["relu", "relu", "relu", "none"]

elif FLAGS.architecture == "architecture_2_":
    hidden_layer_sizes = [60, 30, 20]
    dropout_rates_train = [0, 0, 0, 0]
    activation_functions = ["relu", "relu", "relu", "none"]

elif FLAGS.architecture == "architecture_3_":
    hidden_layer_sizes = [80, 80, 80]
    dropout_rates_train = [0, 0, 0, 0]
    activation_functions = ["relu", "relu", "relu", "none"]

elif FLAGS.architecture == "architecture_4_":
    hidden_layer_sizes = [20, 15, 10, 5]
    activation_functions = ["relu", "relu", "relu", "relu", "none"]
    dropout_rates_train = [0, 0, 0, 0, 0]

elif FLAGS.architecture == "architecture_5_":
    hidden_layer_sizes = [60, 30, 20, 10]
    activation_functions = ["relu", "relu", "relu", "relu", "none"]
    dropout_rates_train = [0, 0, 0, 0, 0]

elif FLAGS.architecture == "architecture_6_":
    hidden_layer_sizes = [80, 80, 80, 80]
    activation_functions = ["relu", "relu", "relu", "relu", "none"]
    dropout_rates_train = [0, 0, 0, 0, 0]

elif FLAGS.architecture == "architecture_7_":
    hidden_layer_sizes = [20, 15, 15, 10, 10, 5]
    dropout_rates_train = [0, 0, 0, 0, 0, 0, 0]
    activation_functions = ["relu", "relu", "relu", "relu", "relu", "relu", "none"]

elif FLAGS.architecture == "architecture_8_":
    hidden_layer_sizes = [60, 30, 20, 20, 10, 5]
    dropout_rates_train = [0, 0, 0, 0, 0, 0, 0]
    activation_functions = ["relu", "relu", "relu", "relu", "relu", "relu", "none"]

elif FLAGS.architecture == "architecture_9_":
    hidden_layer_sizes = [80, 80, 80, 80, 80, 80]
    dropout_rates_train = [0, 0, 0, 0, 0, 0, 0]
    activation_functions = ["relu", "relu", "relu", "relu", "relu", "relu", "none"]
else:
    msg = "Architecture not found! Check the spelling."
    raise ValueError(msg)

if FLAGS.nconsumer_characteristics < 20:
    raise ValueError(
        "Number of consumer characteristics " + "should not be less than 20.",
    )

In [None]:
dropout_rates_test = [0 for i in dropout_rates_train]

# Architecture for the second NN that estimates
# propensity scores
hidden_layer_sizes_treatment = [50, 30]
activation_functions_treatment = ["relu", "relu", "none"]
dropout_rates_train_treatment = [0, 0, 0]
dropout_rates_test_treatment = [0 for i in dropout_rates_train_treatment]

# Setting parameters values for generating fake data
nconsumers = 10000

# Run parameters
train_proportion = 0.9
max_nepochs = 5000
max_epochs_without_change = 30

early_stopping = train_proportion != 1

optimizer = "Adam"
learning_rate = 0.009
batch_size = 128
batch_size_t = None

# Regularization parameters
alpha = 0.0
r = 0.2

# Checking for spelling errors
if not (FLAGS.model == "quadratic" or FLAGS.model == "simple"):
    msg = "Check whether model type is spelled correctly!"
    raise ValueError(msg)
if not (FLAGS.treatment == "random" or FLAGS.treatment == "not_random"):
    msg = "Check whether treatment type is spelled correctly!"
    raise ValueError(msg)

start_time = time.time()

X_train = T_train = Y_train = X_valid = T_valid = Y_valid = X = T_real = Y = None

In [None]:
def get_train_test_inds(t):
    """Split the dataset into training and validation sets while
    preserving the proportion of targeted customers in both datasets.

    Inputs:
    -------
        t: array-like, shape=(N, 1)
            Treatment array.
    Outputs:
    -------
        train_inds: array of bools
            Indices of the training set.
        valid_inds: array of bools
            Indices of the validation set.
    """
    t_array = np.array(t)
    train_inds = np.zeros(len(t_array), dtype=bool)
    valid_inds = np.zeros(len(t_array), dtype=bool)
    values = np.unique(t_array)
    for value in values:
        value_inds = np.nonzero(t_array == value)[0]
        np.random.shuffle(value_inds)
        n = int(train_proportion * len(value_inds))
        train_inds[value_inds[:n]] = True
        valid_inds[value_inds[n:]] = True
    return train_inds, valid_inds

# chatgpt approach

In [None]:
import pandas as pd

# Load the real data
data = pd.read_csv(
    "bld/difference_in_difference_with_deep_learning/data/cleaned_data.csv",
)  # Change the filename to your actual data file

# Assuming 'wage_year' is the variable of interest
X_train = data[["Individual", "Age", "WagePartner", "interaction"]]  # Features
T_train = data["Reform"]  # Treatment assignment
Y_train = data["wage_year"]  # Outcome

# For demonstration, assuming no validation data and using all data for training
X_valid = X_train
T_valid = T_train
Y_valid = Y_train

In [None]:
hidden_layer_sizes = [100, 50, 20]  # Adjust based on the complexity of your problem
activation_functions = ["relu", "relu", "none"]
dropout_rates_train = [0.1, 0.2, 0.0]

# Adjust run parameters
train_proportion = 0.8  # Adjust based on your dataset size and computational resources
max_nepochs = 1000
max_epochs_without_change = 20
optimizer = "Adam"
learning_rate = 0.001
batch_size = 64

# Prepare real data (preprocessing steps)
# You might need to normalize numerical features, encode categorical variables, handle missing values, etc.
# For example, using sklearn.preprocessing.StandardScaler for normalization

In [None]:
class RealData:
    """Class to handle real data for testing the NN method.

    Inputs:
    -------
        X: pandas DataFrame
            Features.
        T: pandas Series
            Treatment assignment.
        Y: pandas Series
            Outcome.
        treatment: {'random', 'not_random'}
            If 'random' consumers are being treated at random.
            Otherwise, probability of being treated is a function
            of consumer characteristics.
        nconsumer_characteristics: int
            Number of consumer characteristics.
    """

    def __init__(
        self,
        X,
        T,
        Y,
        treatment="not_random",
        nconsumer_characteristics=FLAGS.nconsumer_characteristics,
    ):
        self.X = X
        self.T = T
        self.Y = Y
        self.treatment = treatment
        self.nconsumer_characteristics = nconsumer_characteristics

        # Placeholder for other variables
        self.seed = None
        self.prob_of_T = None
        self.tau_true_mean = None

    def prepare_real_data(self):
        """Prepare the real data.

        Returns:
        -------
            Tuple containing the real data attributes:
            (Y, X, mu0, tau, T, seed, prob_of_T, tau_true_mean)
        """
        # You may need to perform additional preprocessing steps here
        # For example, encoding categorical variables, handling missing values, etc.

        # Placeholder for the data attributes
        mu0 = None  # Placeholder for mu0
        tau = None  # Placeholder for tau
        seed = None  # Placeholder for seed
        prob_of_T = None  # Placeholder for propensity scores
        tau_true_mean = None  # Placeholder for true average treatment effect

        return (self.Y, self.X, mu0, tau, self.T, seed, prob_of_T, tau_true_mean)


# Usage with real data
real_data = RealData(X_train_scaled, T_train, Y_train)
Y, X, mu0, tau, T, seed, prob_of_T, tau_true_mean = real_data.prepare_real_data()

In [None]:
def get_train_test_inds(t, train_proportion):
    """Split the dataset into training and validation sets while
    preserving the proportion of targeted customers in both datasets.

    Inputs:
    -------
        t: array-like, shape=(N, 1)
            Treatment array.
        train_proportion: float
            Proportion of the dataset to be used for training.

    Outputs:
    -------
        train_inds: array of bools
            Indices of the training set.
        valid_inds: array of bools
            Indices of the validation set.
    """
    t_array = np.array(t)
    train_inds = np.zeros(len(t_array), dtype=bool)
    valid_inds = np.zeros(len(t_array), dtype=bool)
    values = np.unique(t_array)
    for value in values:
        value_inds = np.nonzero(t_array == value)[0]
        np.random.shuffle(value_inds)
        n = int(train_proportion * len(value_inds))
        train_inds[value_inds[:n]] = True
        valid_inds[value_inds[n:]] = True
    return train_inds, valid_inds


def calculate_batch_size(batch_size, X_train):
    """If batch_size is int than do nothing, else if batch_size is
    equal to None, set batch size to be of a size equal to the
    length of the training dataset.

    Inputs:
    -------
        batch_size: int or None
            Batch size.
        X_train: ndarray
            Array of consumer characteristics on which to
            perform training.
    Outputs:
    -------
        batch_size: int
            Batch size.
    """
    if batch_size is None:
        batch_size = len(X_train)
    return batch_size


def plotting_loss_functions(loss1, loss2=None, add_title=""):
    """Plot the loss functions.

    Inputs:
    -------
        loss1: list of floats
            First list of recorded losses through epochs.
        loss2: list of floats
            Second list of recorded losses through epochs.
        add_title: string
            Addition to the title of the graph.
    """
    if loss2 is None:
        loss2 = []
    plt.figure(figsize=(12, 5))
    plt.clf()
    plt.plot(range(len(loss1)), loss1, "r-", lw=3)
    if early_stopping:
        plt.plot(range(len(loss2)), loss2, "b-", lw=3)
        plt.legend(["loss on training set", "loss on validation set"])
        plt.title("Loss on training and validation set" + add_title, fontsize=14)
    else:
        plt.legend(["loss on training set"])
        plt.title("Loss on training set" + add_title, fontsize=14)
    plt.xlabel("Epoch number", fontsize=14)
    plt.ylabel("Loss", fontsize=14)

#### from here errors

In [None]:
class NeuralNetwork:
    """Create a neural network with specified properties.

    Inputs:
    -------
        hidden_layer_sizes: list of ints
            Length of the list defines the number of hidden layers.
            Entries of the list define the number of hidden units in
            each hidden layer.
        activation_functions: list of {'relu', 'lrelu', 'prelu',
                                       'srelu', 'plu', 'elu', 'none'}
            Activation function for each layer.
            Has to be of length len(hidden_layer_sizes) + 1.
        dropout_rates_train:  list of floats
            Dropout rate to be used during training for each layer.
            Has to be of length len(hidden_layer_sizes) + 1.
        batch_size: int
            Batch size.
        size_of_the_output: int
            Number of units in the output layer.
        nconsumer_characteristics: int
            Number of consumer characteristics.
        alpha: float
            Regularization strength parameter.
        r_par: float
            Mixing ratio of Ridge and Lasso regression.
            Has to be between 0 and 1.
        max_epochs_without_change: int
            Number of epochs with no improvement on the validation loss
            to wait before stopping the training.
        max_nepochs: int
            Maximum number of epochs for which NNs will be trained.
        optimizer: string
            Optimizer
        learning_rate: scalar
            Learning rate.
    """

    def __init__(
        self,
        hidden_layer_sizes,
        activation_functions,
        dropout_rates_train,
        batch_size,
        size_of_the_output,
        nconsumer_characteristics,
        alpha,
        r_par,
        max_epochs_without_change,
        max_nepochs,
        optimizer,
        learning_rate,
    ):
        self.hidden_layer_sizes = hidden_layer_sizes
        self.activation_functions = activation_functions
        self.dropout_rates_train = dropout_rates_train
        self.dropout_rates_test = [0 for i in dropout_rates_train]
        self.batch_size = batch_size
        self.size_of_the_output = size_of_the_output
        self.nconsumer_characteristics = nconsumer_characteristics
        self.alpha = alpha
        self.r_par = r_par
        self.max_epochs_without_change = max_epochs_without_change
        self.max_nepochs = max_nepochs
        self.optimizer = optimizer
        self.learning_rate = learning_rate

    def _fully_connected_layer_builder(
        self,
        input_data,
        hidden_layer_size,
        total_num_features,
        scope_name,
        activation,
        dropout_rate,
    ):
        """Build a fully connected layer within the NN.

        Inputs:
        -------
            input_data: Tensor
                Output from the previous layer.
            hidden_layer_size: int
                Size of the current layer.
            total_num_features: int
                Number of units from the previous layer.
            scope_name: string
                Scope name.
            activation: {'relu', 'lrelu', 'prelu', 'srelu',
                        'plu', 'elu', 'none'}
                Activation function.
            dropout_rate: scalar
                Dropout rate. Has to be between 0 and 1.

        Outputs:
        -------
            hid_layer_activation: Tensor
                The hidden layer output.
        """
        # Dropout:
        input_data = tf.keras.layers.Dropout(rate=dropout_rate)(input_data)

        # Creating weights and bias terms for our fully connected layer
        with tf.name_scope(scope_name):
            weights = np.sqrt(2) * tf.Variable(
                initial_value=tf.random.normal(
                    [total_num_features, hidden_layer_size],
                    mean=0.0,
                    stddev=1.0,
                ),
                name="weights",
            )
            b = tf.Variable(tf.zeros([hidden_layer_size]), name="biases")

        # Defining the fully connected neural network layer
        hid_layer_activation = tf.matmul(input_data, weights) + b

        if activation == "relu":
            hid_layer_activation = tf.nn.relu(hid_layer_activation)
        elif activation == "lrelu":
            hid_layer_activation = tf.nn.leaky_relu(
                hid_layer_activation,
                alpha=0.2,
                name="lrelu",
            )
        elif activation == "prelu":
            hid_layer_activation = tf.keras.layers.PReLU()(hid_layer_activation)
        elif activation == "srelu":
            hid_layer_activation = srelu_activation(hid_layer_activation, scope_name)
        elif activation == "plu":
            hid_layer_activation = plu_activation(hid_layer_activation)
        elif activation == "elu":
            hid_layer_activation = tf.nn.elu(hid_layer_activation)
        elif activation == "none":
            pass
        else:
            raise ValueError(
                "Activation function not recognized! " + "Check the spelling.",
            )
        return hid_layer_activation

    def _building_the_network(self, layer_input, dropout_rates):
        """Build the whole fully connected NN.

        Inputs:
        -------
            layer_input: Tensor
                Input layer.
            dropout_rates: list of floats
                Dropout rate for each layer. Each entry has to
                be between 0 and 1. Has to be of length
                len(hidden_layer_sizes) + 1.

        Outputs:
        -------
            output_fc_layer: Tensor
                Output layer.
        """
        hidden_layer_sizes_expand = [
            *self.hidden_layer_sizes,
            self.size_of_the_output,
            self.nconsumer_characteristics,
        ]

        for i in range(len(self.hidden_layer_sizes) + 1):
            output_fc_layer = self._fully_connected_layer_builder(
                input_data=layer_input,
                hidden_layer_size=hidden_layer_sizes_expand[i],
                total_num_features=hidden_layer_sizes_expand[i - 1],
                scope_name="l" + str(i + 1),
                activation=self.activation_functions[i],
                dropout_rate=dropout_rates[i],
            )
            layer_input = output_fc_layer
        return output_fc_layer

    def _building_the_network_estimates_TE(
        self,
        input_data,
        t_var,
        y_var,
        dropout_rates,
    ):
        """Build the neural network that estimates treatment
        coefficients.

        Inputs:
        -------
            input_data: Tensor
                Input layer.
            t_var: Tensor
                Treatment
            y_var: Tensor
                Target variable
            dropout_rates: list of floats
                Dropout rate for each layer. Each entry has to
                be between 0 and 1. Has to be of length
                len(hidden_layer_sizes) + 1.

        Outputs:
        -------
            output: Tensor
                Treatment coefficients.
            loss: scalar
                Loss without regularization.
        """
        output = self._building_the_network(input_data, dropout_rates)
        tau = output[:, 0:1]
        mu0 = output[:, 1:2]
        Y_predicted = tf.multiply(t_var, tau) + mu0

        # Mean squared error loss:
        loss = tf.keras.losses.mean_squared_error(y_var, Y_predicted)
        return output, loss

    def _building_the_network_estimates_PS(self, input_data, t_var, dropout_rates):
        """Build the neural network that estimates propensity
        scores.

        Inputs:
        -------
            input_data: Tensor
                Input layer.
            t_var: Tensor
                Treatment
            dropout_rates: list of floats
                Dropout rate for each layer. Each entry has to
                be between 0 and 1. Has to be of length
                len(hidden_layer_sizes) + 1.

        Outputs:
        -------
            output: Tensor
                Output of the NN.
            loss: scalar
                Loss without regularization.
        """
        output = self._building_the_network(input_data, dropout_rates)

        # Calculating binary crossentropy loss
        loss = tf.keras.losses.binary_crossentropy(
            t_var,
            tf.squeeze(output),
            from_logits=True,
        )
        return output, loss

    def _calc_the_loss_with_reg(self, loss_before_regularization):
        """Calculate loss with regularization.

        Inputs:
        -------
            loss_before_regularization: scalar
                Loss without regularization.

        Outputs:
        -------
            total_loss: float
                Loss with regularization.
        """
        l1_l2_regularizer = tf.keras.regularizers.L1L2(
            l1=self.alpha * self.r_par,
            l2=self.alpha * (1 - self.r_par),
        )
        regularization_term = l1_l2_regularizer(tf.trainable_variables())

        return loss_before_regularization + regularization_term

    def _optimize_the_loss_function(self, loss_with_regularization):
        """Update the weights after one training step.

        Inputs:
        -------
            loss_with_regularization: scalar
                Loss with regularization.

        Outputs:
        -------
            train_step: Operation that updates the weights
        """
        if self.optimizer == "RMSProp":
            optimizer = tf.keras.optimizers.RMSprop(learning_rate=self.learning_rate)
        elif self.optimizer == "GradientDescent":
            optimizer = tf.keras.optimizers.SGD(learning_rate=self.learning_rate)
        elif self.optimizer == "Adam":
            optimizer = tf.keras.optimizers.Adam(learning_rate=self.learning_rate)
        else:
            raise ValueError(
                'Optimizer not recognized! Please choose from "RMSProp", "GradientDescent", or "Adam".',
            )

        return optimizer.minimize(loss_with_regularization)

    def _create_minibatches(self, X, T, Y, shuffle=False):
        """Create mini-batches generator. Yields a mini-batch of batch_size
        length of consumer characteristics, X, treatments, T, target
        values, Y.

        Inputs:
        -------
            X: ndarray, shape=(len(X_train), nconsumer_characteristics)
                Array of consumer characteristics.
            T: ndarray, shape=(len(X_train), 1)
                Treatment array.
            Y: ndarray, shape=(len(X_train), 1)
                Target value array.
            shuffle: bool
                If True, shuffle the array.

        Outputs:
        -------
            X[excerpt]: ndarray, shape=(batch_size,
                                        nconsumer_characteristics)
                Mini batch of consumer characteristics.
            T[excerpt]: ndarray, shape=(batch_size, 1)
                Mini batch of treatment values.
            Y[excerpt]: ndarray, shape=(batch_size, 1)
                Mini batch of target values.
        """
        num_samples = X.shape[0]
        indices = np.arange(num_samples)
        if shuffle:
            np.random.shuffle(indices)

        for start_idx in range(0, num_samples, self.batch_size):
            excerpt = indices[start_idx : start_idx + self.batch_size]
            yield X[excerpt], T[excerpt], Y[excerpt]

    def _training_the_NN(
        self,
        X_train,
        T_train,
        Y_train,
        X_valid=None,
        T_valid=None,
        Y_valid=None,
    ):
        """Train a NN for max_nepochs or until early stopping criterion
        is met.

        Inputs:
        -------
            X_train: ndarray, shape=(num_train_samples, num_features)
                Training input data.
            T_train: ndarray, shape=(num_train_samples, 1)
                Training treatment array.
            Y_train: ndarray, shape=(num_train_samples, 1)
                Training target value array.
            X_valid: ndarray, shape=(num_valid_samples, num_features), optional
                Validation input data.
            T_valid: ndarray, shape=(num_valid_samples, 1), optional
                Validation treatment array.
            Y_valid: ndarray, shape=(num_valid_samples, 1), optional
                Validation target value array.

        Outputs:
        -------
            best_loss: float
                Minimum value of loss achieved on the validation set if
                early stopping is enabled and validation data is provided.
                Otherwise, loss achieved on the training set during the last epoch.
            epoch_best: int
                Epoch at which the minimum loss on the validation set was
                achieved if early stopping is enabled and validation data is provided.
                Otherwise, equal to max_nepochs for which the NN is trained.
            output_best: ndarray
                Output of the NN at the epoch_best.
            total_nparameters: int
                Number of neural network parameters.
        """
        num_features = X_train.shape[1]
        X_train.shape[0]

        # Placeholders
        x = tf.placeholder(tf.float32, shape=[None, num_features])
        t = tf.placeholder(tf.float32, shape=[None, 1])
        y = tf.placeholder(tf.float32, shape=[None, 1])
        dropout_rates = tf.placeholder(tf.float32, shape=[None])

        output, loss = self._building_the_network(x, dropout_rates)

        total_loss = self._calc_the_loss_with_reg(loss)

        train_step = self._optimize_the_loss_function(total_loss)

        sess = tf.InteractiveSession()
        # Initializing all variables
        sess.run(tf.global_variables_initializer())
        epoch_without_change = 0
        break_cond = False

        loss_train_list = []

        if (
            self.early_stopping
            and X_valid is not None
            and T_valid is not None
            and Y_valid is not None
        ):
            loss_validation_list = []
            validation_loss_min = float("inf")
            feed_dict_valid = {
                x: X_valid,
                t: T_valid,
                y: Y_valid,
                dropout_rates: self.dropout_rates_test,
            }

        for i in range(self.max_nepochs):
            if (
                self.early_stopping
                and X_valid is not None
                and T_valid is not None
                and Y_valid is not None
            ):
                loss_valid = total_loss.eval(feed_dict=feed_dict_valid)
                loss_validation_list.append(loss_valid)

                if validation_loss_min > loss_valid:
                    validation_loss_min = loss_valid
                    output_best = output.eval(
                        feed_dict={x: X_train, dropout_rates: self.dropout_rates_test},
                    )
                    epoch_best = i
                    epoch_without_change = 0
                else:
                    epoch_without_change += 1

            s = 0
            for mini_batch in self._create_minibatches(
                X_train,
                T_train,
                Y_train,
                shuffle=True,
            ):
                x_batch, t_batch, y_batch = mini_batch
                feed_dict_train = {
                    x: x_batch,
                    t: t_batch,
                    y: y_batch,
                    dropout_rates: self.dropout_rates_train,
                }
                loss_train = sess.run(total_loss, feed_dict=feed_dict_train)
                if s == 0:
                    loss_train_list.append(loss_train)

                if epoch_without_change > self.max_epochs_without_change:
                    break_cond = True
                    break
                sess.run(train_step, feed_dict=feed_dict_train)
                s += 1

            if self.verbose and i % 25 == 0:
                if (
                    self.early_stopping
                    and X_valid is not None
                    and T_valid is not None
                    and Y_valid is not None
                ):
                    print("%d epoch:" % i, "loss on validation set:", loss_valid)
                else:
                    print("%d epoch:" % i, "loss on training set:", loss_train)

            # Check the stopping condition
            if break_cond:
                if self.verbose:
                    print("Training is finished! ", end="")
                    if (
                        self.early_stopping
                        and X_valid is not None
                        and T_valid is not None
                        and Y_valid is not None
                    ):
                        print("Best validation loss achieved at %d epoch" % epoch_best)
                break

        if (
            not self.early_stopping
            or X_valid is None
            or T_valid is None
            or Y_valid is None
        ):
            output_best = output.eval(
                feed_dict={x: X_train, dropout_rates: self.dropout_rates_test},
            )
            epoch_best = i + 1
            best_loss = loss_train
            loss_list = loss_train_list
        else:
            best_loss = validation_loss_min
            loss_list = loss_validation_list

        # Num of N parameters
        total_nparameters = np.sum(
            [
                np.product([xi.value for xi in x.get_shape()])
                for x in tf.trainable_variables()
            ],
        )

        # Plotting loss functions
        if self.plot_true:
            plotting_loss_functions(loss_train_list, loss_list)

        # Close tf.InteractiveSession
        sess.close()

        return best_loss, epoch_best, output_best, total_nparameters

    def training_the_NN_estimates_TE(
        self,
        X_train,
        T_train,
        Y_train,
        X_valid=None,
        T_valid=None,
        Y_valid=None,
    ):
        """Train a neural network that estimates treatment coefficients for
        max_nepochs or until early stopping criterion is met.

        Inputs:
        -------
            X_train: ndarray, shape=(num_train_samples, num_features)
                Training input data.
            T_train: ndarray, shape=(num_train_samples, 1)
                Training treatment array.
            Y_train: ndarray, shape=(num_train_samples, 1)
                Training target value array.
            X_valid: ndarray, shape=(num_valid_samples, num_features), optional
                Validation input data.
            T_valid: ndarray, shape=(num_valid_samples, 1), optional
                Validation treatment array.
            Y_valid: ndarray, shape=(num_valid_samples, 1), optional
                Validation target value array.

        Outputs:
        -------
            best_loss: float
                Minimum value of loss achieved on the validation set if
                early stopping is enabled and validation data is provided.
                Otherwise, loss achieved on the training set during the last epoch.
            epoch_best: int
                Epoch at which the minimum loss on the validation set was
                achieved if early stopping is enabled and validation data is provided.
                Otherwise, equal to max_nepochs for which the NN is trained.
            output_best: ndarray
                Output of the NN at the epoch_best.
            total_nparameters: int
                Number of neural network parameters.
        """
        return self._training_the_NN(
            X_train,
            T_train,
            Y_train,
            X_valid,
            T_valid,
            Y_valid,
        )

    def training_the_NN_estimates_PS(
        self,
        X_train,
        T_train,
        X_valid=None,
        T_valid=None,
    ):
        """Train a neural network that estimates propensity scores for
        max_nepochs or until early stopping criterion is met.

        Inputs:
        -------
            X_train: ndarray, shape=(num_train_samples, num_features)
                Training input data.
            T_train: ndarray, shape=(num_train_samples, 1)
                Training treatment array.
            X_valid: ndarray, shape=(num_valid_samples, num_features), optional
                Validation input data.
            T_valid: ndarray, shape=(num_valid_samples, 1), optional
                Validation treatment array.

        Outputs:
        -------
            best_loss: float
                Minimum value of loss achieved on the validation set if
                early stopping is enabled and validation data is provided.
                Otherwise, loss achieved on the training set during the last epoch.
            epoch_best: int
                Epoch at which the minimum loss on the validation set was
                achieved if early stopping is enabled and validation data is provided.
                Otherwise, equal to max_nepochs for which the NN is trained.
            output_best: ndarray
                Output of the NN at the epoch_best.
            total_nparameters: int
                Number of neural network parameters.
        """
        return self._training_the_NN(X_train, T_train, X_valid=X_valid, T_valid=T_valid)

In [None]:
import numpy as np


def influence_functions(mu0_pred, tau_pred, Y, T, prob_t_pred, treatment="not_random"):
    """Calculate the target value for each individual when treatment is
    0 or 1.

    Inputs:
    -------
        mu0_pred: ndarray, shape=(N, 1)
        tau_pred: ndarray, shape=(N, 1)
            Estimated conditional average treatment effect.
        Y: ndarray, shape=(N,)
            Target value array.
        T: ndarray, shape=(N,)
            Treatment array.
        prob_t_pred: ndarray, shape=(N,)
            Estimated propensity scores.
        treatment: str, default='not_random'
            Type of treatment. Options: 'not_random', 'random'.

    Outputs:
    -------
        psi_0: ndarray, shape=(N, 1)
            Influence function for given x in case of no treatment.
        psi_1: ndarray, shape=(N, 1)
            Influence function for given x in case of treatment.
    """
    first_part = (1 - T) * (Y - mu0_pred)
    second_part = T * (Y - mu0_pred - tau_pred)

    if treatment == "not_random":
        prob_t_pred = np.clip(prob_t_pred, 0.0001, 0.9999)
        psi_0 = (first_part / (1 - prob_t_pred)) + mu0_pred
        psi_1 = (second_part / prob_t_pred) + mu0_pred + tau_pred
    else:
        psi_0 = (first_part / (1 - np.mean(T))) + mu0_pred
        psi_1 = (second_part / np.mean(T)) + mu0_pred + tau_pred

    return psi_0, psi_1

In [None]:
import os

import pandas as pd


def update_model_comparison_file(name, model_info, cols):
    """Update .csv file with new model results.

    Inputs:
    -------
        name: string
            File name. If the file does not already exist creates a
            new file. Otherwise appends new model results to the
            existing file.
        model_info: list
            Results of the current run.
        cols: list
            Names of columns within the .csv file.
            Has to be of the same length as model_info.
    """
    if not os.path.isfile(name):
        df = pd.DataFrame(columns=cols)
        df.to_csv(name, index=False)
        print("File does not exist. Creating new file!")
    else:
        print("File already exists. Appending model run!")

    Model_comparison_Catalog_dataset = pd.read_csv(name)
    ind = len(Model_comparison_Catalog_dataset["Model number"])
    model_info[0][0] = ind
    df = pd.DataFrame(model_info, columns=cols)
    Model_comparison_Catalog_dataset = Model_comparison_Catalog_dataset.append(
        df,
        ignore_index=True,
    )
    Model_comparison_Catalog_dataset.to_csv(name, index=False)

In [None]:
def main():
    print("-------------------------------------------------------")
    print("Running Monte Carlo simulations for the following case:")
    print("* Real data")
    print("* Using the following NN architectures:")
    print("First NN hidden layer sizes: ", hidden_layer_sizes)
    print("First NN hidden activations: ", activation_functions)
    print("First NN dropout rates: ", dropout_rates_train, "\n")

    print("Second NN hidden layer sizes: ", hidden_layer_sizes_treatment)
    print("Second NN hidden activations: ", activation_functions_treatment)
    print("Second NN dropout rates: ", dropout_rates_train_treatment)
    print("-------------------------------------------------------\n")

    # Assuming you have loaded your real data into X, T_real, and Y
    global X_train, T_train, Y_train
    X_valid = X_train_scaled
    T_valid = T_train
    Y_valid = Y_train

    count_in_interval = 0

    # --------------------- Training the model ---------------------
    # global X_train, T_train, Y_train
    print("Shapes of X, T_real, Y:", X.shape, T_real.shape, Y.shape)
    X_train, X_valid, T_train, T_valid, Y_train, Y_valid = train_test_split(
        X,
        T_real,
        Y,
        test_size=0.2,
        random_state=42,
    )

    print(
        "Shapes of X_train, X_valid, T_train, T_valid, Y_train, Y_valid:",
        X_train.shape,
        X_valid.shape,
        T_train.shape,
        T_valid.shape,
        Y_train.shape,
        Y_valid.shape,
    )

    batch_size_ = calculate_batch_size(batch_size, X_train)
    batch_size_t_ = calculate_batch_size(batch_size_t, X_train)

    first_NN = NeuralNetwork(
        hidden_layer_sizes,
        activation_functions,
        dropout_rates_train,
        batch_size_,
        2,
    )
    (
        MSE_best,
        epoch_best,
        betas_pred_best,
        total_nparameters,
    ) = first_NN.training_the_NN_estimates_TE()

    if FLAGS.treatment == "not_random":
        second_NN = NeuralNetwork(
            hidden_layer_sizes_treatment,
            activation_functions_treatment,
            dropout_rates_train_treatment,
            batch_size_t_,
            1,
        )
        (
            CE_best,
            epoch_best_t,
            treat_best,
            total_nparameters_t,
        ) = second_NN.training_the_NN_estimates_PS()

    # -------------------- Looking at the results ----------------
    betas_pred = betas_pred_best
    tau_pred = betas_pred[:, 0:1]
    mu0_pred = betas_pred[:, 1:]

    if FLAGS.treatment == "not_random":
        prob_of_t_pred = 1 / (1 + np.exp(-treat_best))

    np.mean(mu0_pred)
    np.std(mu0_pred)
    np.mean(tau_pred)
    np.std(tau_pred)

    # Calculating confidence interval for average treatment effect
    psi_0, psi_1 = influence_functions(
        mu0_pred,
        tau_pred,
        Y,
        T_real,
        prob_of_t_pred if FLAGS.treatment == "not_random" else None,
    )
    mean_diff_psi1_psi0 = np.mean(psi_1 - psi_0)
    std_diff_psi1_psi0 = np.std(psi_1 - psi_0)
    CI_upper_bound = mean_diff_psi1_psi0 + 1.96 * std_diff_psi1_psi0 / np.sqrt(len(Y))
    CI_lower_bound = mean_diff_psi1_psi0 - 1.96 * std_diff_psi1_psi0 / np.sqrt(len(Y))

    in_95_conf_int = CI_lower_bound < tau_true_mean < CI_upper_bound

    print("is tau_true_mean in interval:", in_95_conf_int)
    print(
        f"CI lower and upper bound are: ({CI_lower_bound:0.3f}, {CI_upper_bound:0.3f})",
    )
    if in_95_conf_int:
        count_in_interval += 1

    mu0_pred + tau_pred * T_real

    # ----------------- Saving the results! ----------------------
    # You can adjust this part based on how you want to save the results
    # It seems you already have a function update_model_comparison_file

    print(
        "%d out of %d simulations contain tau_true_mean in the CI"
        % (count_in_interval, FLAGS.nsimulations),
    )


if __name__ == "__main__":
    main()