In [2]:
import pandas as pd
from linearmodels.panel import PanelOLS, RandomEffects
from linearmodels.panel import compare

def load_and_merge_data(file_list):
    """ Load and merge multiple CSV files into a single DataFrame """
    data_frames = []
    for file_name in file_list:
        file_name = './../data/final_vars/' + file_name
        df = pd.read_csv(file_name, index_col=['date', 'cb privatbank', 'credit agricole bank', 
                                               'fuib', 'kredobank', 'oschadbank', 'otp bank', 
                                               'pivdennyi bank', 'raiffeisen bank',
                                               'sense bank', 'ukrsibbank', 'universal bank'])
        df = df.stack().reset_index()
        df.columns = ['date', 'bank', 'variable', file_name[:-4]]
        data_frames.append(df.pivot(index=['date', 'bank'], columns='variable', values=file_name[:-4]))
    
    merged_df = pd.concat(data_frames, axis=1)
    return merged_df

def panel_analysis(df):
    df = df.dropna()  # ensure there are no NaNs, you might want to handle this differently
    df = df.astype(float)  # convert all columns to float for analysis

    # Fixed Effects Model
    fe_model = PanelOLS.from_formula('NIM ~ INF + LAS + NIA + OE + PR + RA + ROA + SCTA + SIZE + EntityEffects + TimeEffects', data=df)
    fe_results = fe_model.fit()
    
    # Random Effects Model
    re_model = RandomEffects.from_formula('NIM ~ INF + LAS + NIA + OE + PR + RA + ROA + SCTA + SIZE', data=df)
    re_results = re_model.fit()

    # Comparing Models and Conducting Hausman Test
    comparison = compare({'FE': fe_results, 'RE': re_results}, precision='pvalues')
    
    return fe_results, re_results, comparison

# Your CSV files
files = ['INF.csv', 'LAS.csv', 'NIA.csv', 'NIM.csv', 'OE.csv', 'PR.csv', 'RA.csv', 'ROA.csv', 'SCTA.csv', 'SIZE.csv']

# Load and preprocess the data
df = load_and_merge_data(files)

# Run panel data analysis
fixed_effects_results, random_effects_results, hausman_results = panel_analysis(df)

# Output the results
print("Fixed Effects Results:", fixed_effects_results)
print("Random Effects Results:", random_effects_results)
print("Hausman Test Results:", hausman_results)


ValueError: Index cb privatbank invalid

In [6]:
import pandas as pd
from linearmodels.panel import PanelOLS, RandomEffects
from linearmodels.panel import compare

def load_and_merge_data(file_list):
    """ Load and merge multiple CSV files into a single DataFrame """
    data_frames = []
    
    for file_name in file_list:
        file_name = './../data/final_vars/' + file_name
        df = pd.read_csv(file_name)
        df_long = pd.melt(df, id_vars=['date'], var_name='bank', value_name=file_name[:-4])
        data_frames.append(df_long)
    
    all_data = pd.DataFrame(data_frames[0])
    
    for df in data_frames[1:]:
        all_data = pd.merge(all_data, df, on=['date', 'bank'], how='outer')
    
    all_data.set_index(['date', 'bank'], inplace=True)
    return all_data

def panel_analysis(df):
    df = df.dropna()  # Drop missing values.
    df['NIM'] = df['NIM'].astype(float)  # Ensuring that dependent variable is float.
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')  # Convert all columns to numeric values.

    # Fixed Effects Model
    fe_model = PanelOLS(df['NIM'], df.drop(columns=['NIM']).assign(const=1), entity_effects=True, time_effects=True)
    fe_results = fe_model.fit(cov_type='clustered', cluster_entity=True)
    
    # Random Effects Model
    re_model = RandomEffects(df['NIM'], df.drop(columns=['NIM']).assign(const=1))
    re_results = re_model.fit()

    # Comparing Models and Conducting Hausman Test
    hausman_test = compare({'FE': fe_results, 'RE': re_results}, precision='pvalues', stars=False)
    
    return fe_results, re_results, hausman_test

# List of your CSV files
files = ['INF.csv', 'LAS.csv', 'NIA.csv', 'NIM.csv', 'OE.csv', 'PR.csv', 'RA.csv', 'ROA.csv', 'SCTA.csv', 'SIZE.csv']

# Load and preprocess the data
df = load_and_merge_data(files)
print(df.head())
# # Run panel data analysis
# fixed_effects_results, random_effects_results, hausman_results = panel_analysis(df)
# 
# # Output the results
# print("Fixed Effects Results:\n", fixed_effects_results)
# print("\nRandom Effects Results:\n", random_effects_results)
# print("\nHausman Test Results:\n", hausman_results)

              ./../data/final_vars/INF  ./../data/final_vars/LAS  \
date    bank                                                       
01/2020 inf                        2.4                       NaN   
01/2021 inf                        7.5                       NaN   
01/2022 inf                       10.7                       NaN   
01/2023 inf                       24.9                       NaN   
01/2024 inf                        4.3                       NaN   

              ./../data/final_vars/NIA  ./../data/final_vars/NIM  \
date    bank                                                       
01/2020 inf                        NaN                       NaN   
01/2021 inf                        NaN                       NaN   
01/2022 inf                        NaN                       NaN   
01/2023 inf                        NaN                       NaN   
01/2024 inf                        NaN                       NaN   

              ./../data/final_vars/OE  ./../da

In [13]:
import pandas as pd
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col

# Load all the CSV files into separate DataFrames
#inf = pd.read_csv('./../data/final_vars/INF.csv')
las = pd.read_csv('./../data/final_vars/LAS.csv')
nia = pd.read_csv('./../data/final_vars/NIA.csv')
nim = pd.read_csv('./../data/final_vars/NIM.csv')
oe = pd.read_csv('./../data/final_vars/OE.csv')
#pr = pd.read_csv('./../data/final_vars/PR.csv')
ra = pd.read_csv('./../data/final_vars/RA.csv')
roa = pd.read_csv('./../data/final_vars/ROA.csv')
scta = pd.read_csv('./../data/final_vars/SCTA.csv')
size = pd.read_csv('./../data/final_vars/SIZE.csv')

# Merge all the DataFrames into a single DataFrame
#data = nim.merge(inf, on=['date', 'cb privatbank', 'credit agricole bank', 'fuib', 'kredobank', 'oschadbank', 'otp bank', 'pivdennyi bank', 'raiffeisen bank', 'sense bank', 'ukrsibbank', 'universal bank'], how='left')
data = nim.merge(las, on=['date', 'cb privatbank', 'credit agricole bank', 'fuib', 'kredobank', 'oschadbank', 'otp bank', 'pivdennyi bank', 'raiffeisen bank', 'sense bank', 'ukrsibbank', 'universal bank'], how='left')
data = data.merge(nia, on=['date', 'cb privatbank', 'credit agricole bank', 'fuib', 'kredobank', 'oschadbank', 'otp bank', 'pivdennyi bank', 'raiffeisen bank', 'sense bank', 'ukrsibbank', 'universal bank'], how='left')
data = data.merge(oe, on=['date', 'cb privatbank', 'credit agricole bank', 'fuib', 'kredobank', 'oschadbank', 'otp bank', 'pivdennyi bank', 'raiffeisen bank', 'sense bank', 'ukrsibbank', 'universal bank'], how='left')
#data = data.merge(pr, on=['date', 'cb privatbank', 'credit agricole bank', 'fuib', 'kredobank', 'oschadbank', 'otp bank', 'pivdennyi bank', 'raiffeisen bank', 'sense bank', 'ukrsibbank', 'universal bank'], how='left')
data = data.merge(ra, on=['date', 'cb privatbank', 'credit agricole bank', 'fuib', 'kredobank', 'oschadbank', 'otp bank', 'pivdennyi bank', 'raiffeisen bank', 'sense bank', 'ukrsibbank', 'universal bank'], how='left')
data = data.merge(roa, on=['date', 'cb privatbank', 'credit agricole bank', 'fuib', 'kredobank', 'oschadbank', 'otp bank', 'pivdennyi bank', 'raiffeisen bank', 'sense bank', 'ukrsibbank', 'universal bank'], how='left')
data = data.merge(scta, on=['date', 'cb privatbank', 'credit agricole bank', 'fuib', 'kredobank', 'oschadbank', 'otp bank', 'pivdennyi bank', 'raiffeisen bank', 'sense bank', 'ukrsibbank', 'universal bank'], how='left')
data = data.merge(size, on=['date', 'cb privatbank', 'credit agricole bank', 'fuib', 'kredobank', 'oschadbank', 'otp bank', 'pivdennyi bank', 'raiffeisen bank', 'sense bank', 'ukrsibbank', 'universal bank'], how='left')

# Set the date column as the index
data = data.set_index('date')

print(data.head())
# Create the fixed effect model
fixed_effects = smf.ols('NIM ~ LAS + NIA + OE + RA + ROA + SCTA + SIZE', data=data).fit()
print(fixed_effects.summary())

# Create the random effect model
random_effects = smf.mixedlm('NIM ~ LAS + NIA + OE + RA + ROA + SCTA + SIZE', data=data, groups=data.index).fit()
print(random_effects.summary())

# Run the Hausman test
from statsmodels.stats.panel import compare_nested_pm
hausman_test = compare_nested_pm(fixed_effects, random_effects)
print(hausman_test)

            cb privatbank  credit agricole bank      fuib  kredobank  \
date                                                                   
2019-02-01       0.002756              0.005225  0.007254   0.006358   
2019-03-01       0.003304              0.005382  0.007281   0.007150   
2019-04-01       0.003044              0.005423  0.007341   0.006603   
2019-05-01       0.003282              0.005254  0.007146   0.006164   
2019-06-01       0.003109              0.004842  0.007220   0.006821   

            oschadbank  otp bank  pivdennyi bank  raiffeisen bank  sense bank  \
date                                                                            
2019-02-01    0.006125  0.007255        0.004385         0.007056    0.004610   
2019-03-01   -0.003152  0.007444        0.003774         0.007565    0.004528   
2019-04-01    0.001788  0.007224        0.003401         0.007481    0.004516   
2019-05-01    0.001534  0.007762        0.003999         0.007299    0.004788   
2019-06-0

PatsyError: Error evaluating factor: NameError: name 'OE' is not defined
    NIM ~ LAS + NIA + OE + RA + ROA + SCTA + SIZE
                      ^^