Load all the necessary libries

In [4]:
!pip install networkx matplotlib seaborn scipy numpy

Collecting networkx
  Downloading networkx-3.5-py3-none-any.whl.metadata (6.3 kB)
Downloading networkx-3.5-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m6.9 MB/s[0m  [33m0:00:00[0m eta [36m0:00:01[0m
[?25hInstalling collected packages: networkx
Successfully installed networkx-3.5


In [1]:
# Install required packages for Colab/Jupyter
!pip install pandas numpy scikit-learn tensorflow econml matplotlib seaborn fredapi

Collecting pandas
  Downloading pandas-2.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (91 kB)
Collecting numpy
  Downloading numpy-2.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (62 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.7.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)
Collecting tensorflow
  Downloading tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.5 kB)
Collecting econml
  Downloading econml-0.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (37 kB)
Collecting matplotlib
  Downloading matplotlib-3.10.6-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)
Collecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting fredapi
  Downloading fredapi-0.5.2-py3-none-any.whl.metadata (5.0 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-a

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from econml.dml import LinearDML
import matplotlib.pyplot as plt
import seaborn as sns
from fredapi import Fred

# Set random seed for reproducibility
np.random.seed(42)

2025-09-14 21:48:05.688377: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-09-14 21:48:07.248004: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-09-14 21:48:10.289494: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
  from .autonotebook import tqdm as notebook_tqdm


In [5]:

import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from scipy import stats
from scipy.stats import pearsonr
import itertools
from typing import Dict, List, Tuple, Set
import warnings
import os
import json
from datetime import datetime
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
np.random.seed(42)

**Load and Preprocess Macroeconomic Data**

In [7]:
# Load and preprocess macroeconomic data
def load_macro_data():
    # Load datasets (upload these to Colab or ensure paths are correct in Jupyter)
    gdp = pd.read_csv('/workspaces/primary-thesis/database/GDPC1.csv', parse_dates=['observation_date']).rename(columns={'GDPC1': 'GDP'})
    cpi = pd.read_csv('/workspaces/primary-thesis/database/CPIAUCSL.csv', parse_dates=['observation_date']).rename(columns={'CPIAUCSL': 'CPI'})
    unemp = pd.read_csv('/workspaces/primary-thesis/database/UNRATE.csv', parse_dates=['observation_date']).rename(columns={'UNRATE': 'Unemployment'})

    # Load interest rate data from FRED (replace with your FRED API key)
    fred = Fred(api_key='5333568c9d8463b553a83a85ec771c83')
    interest_rate = fred.get_series('FEDFUNDS').to_frame(name='interest_rate').reset_index().rename(columns={'index': 'observation_date'})
    interest_rate['observation_date'] = pd.to_datetime(interest_rate['observation_date'])

    # Resample to monthly frequency and align
    gdp.set_index('observation_date', inplace=True)
    gdp = gdp.resample('MS').ffill()
    cpi.set_index('observation_date', inplace=True)
    unemp.set_index('observation_date', inplace=True)
    interest_rate.set_index('observation_date', inplace=True)

    df = gdp.join(cpi, how='inner').join(unemp, how='inner').join(interest_rate, how='inner')
    df.dropna(inplace=True)

    print("Macroeconomic data shape:", df.shape)
    print(df.head())
    return df

macro_df = load_macro_data()

Macroeconomic data shape: (847, 4)
                       GDP    CPI  Unemployment  interest_rate
observation_date                                              
1954-07-01        2880.482  26.86           5.8           0.80
1954-08-01        2880.482  26.85           6.0           1.22
1954-09-01        2880.482  26.81           6.1           1.07
1954-10-01        2936.852  26.72           5.7           0.85
1954-11-01        2936.852  26.78           5.3           0.83


Load Firm data

In [6]:
# Load and preprocess firm data
def load_firm_data():
    firm_data = pd.read_csv('/workspaces/primary-thesis/database/bds2022.csv')
    firm_data['survival_rate'] = 1 - (firm_data['firmdeath_firms'] / firm_data['firms'])
    firm_data = firm_data[['year', 'survival_rate']]

    print("Firm data shape:", firm_data.shape)
    print(firm_data.head())
    return firm_data

firm_data = load_firm_data()

Firm data shape: (45, 2)
   year  survival_rate
0  1978       0.908297
1  1979       0.910581
2  1980       0.901265
3  1981       0.899917
4  1982       0.891412


**Load Data For micro economi**