In [1]:
import semopy
import pandas as pd
import numpy as np
import os
from joblib import Parallel, delayed
import pygsheets

In [2]:
google_client = pygsheets.authorize(service_file='idea-frm-500603810b1a.json')
SEM_ModelEstimates_ConsolidatedRC_google_sheet = google_client.open('Model_Estimates_SEM_Consolidated_RC')
SEM_Modelfit_Scores_ConsolidatedRC_google_sheet = google_client.open('ModelFit_Scores_SEM_Consolidated_RC')
SEM_FactorScores_ConsolidatedRC_google_sheet = google_client.open('Factor_Scores_SEM_Consolidated_RC') 

In [3]:
consolidated_sem_input_rc = pd.read_csv('SEM_InputData/SEM_Input_Consolidated_5Years.csv')

In [5]:
consolidated_sem_input_rc_norm = pd.DataFrame()
for column in consolidated_sem_input_rc.columns:
    x = consolidated_sem_input_rc[column]
    if column in ['object_id','district', 'revenue_cr']:
        consolidated_sem_input_rc_norm[column] = x
    else:
        normalized = (x-min(x))/(max(x)-min(x))
        consolidated_sem_input_rc_norm[column] = normalized

In [6]:
structural_model_spec = """
# measurement model
flood_proneness =~ Inundation + assam_dist_from_major_rivers_updated_3857 + sum + GCN250_ARCIII_average + strm_filled_slope_degrees + ndvi + srtm_filled_dem + assam_soil_silt + assam_soil_loamysand + assam_lith_neogene + assam_lith_paleogene + assam_lith_paleozoic + assam_lith_quaternary + assam_lith_tertiary + assam_lith_undevelopedprecambrian + landuse_rangeland + landuse_vegetation
demography =~ ind_ppp_UNadj + aged + young + percaay + deprived + nophone + noSanitation + nodrinkingWater + totLivestock
infra_access =~ ndbi + proximity_hosptial_rd + proximity_embankment_rd + proximity_rail_rd + proximity_local_rd + proximity_arterial_rd
flood_impact =~ population_affected + human_lives_lost + roads_dam + bridges_dam + embankment_total
preparedness =~ Count_Total + Count_SDRF + Count_SOPD + Count_RIDF + Sum_Total + Sum_RIDF + Sum_Erosion

# regressions
flood_impact ~ flood_proneness + demography + infra_access
preparedness ~ flood_proneness + demography + infra_access + flood_impact
"""

In [7]:
for optimiser in ['MLW','ULS','DWLS','WLS','FIML','GLS']:
    model = semopy.Model(structural_model_spec)
    model.fit(consolidated_sem_input_rc_norm.fillna(0),
         obj=optimiser,
         solver='SLSQP',
        )
    
    try:
        coeff_df = model.inspect()
        model_estimates_sheet = SEM_ModelEstimates_ConsolidatedRC_google_sheet.worksheet('title',optimiser)
        model_estimates_sheet.clear()
        model_estimates_sheet.set_dataframe(coeff_df,(1,1))
    except:
        print('Error for: ', optimiser)
        pass
    
    try:
        stats = semopy.calc_stats(model)
        modelfit_scores_sheet = SEM_Modelfit_Scores_ConsolidatedRC_google_sheet.worksheet('title',optimiser)
        modelfit_scores_sheet.clear()
        modelfit_scores_sheet.set_dataframe(stats,(1,1))
    except:
        print('Error for: ', optimiser)
        pass
    
    try:
        factorScores = model.predict_factors(consolidated_sem_input_rc_norm.fillna(0))
        factorscores_sheet = SEM_FactorScores_ConsolidatedRC_google_sheet.worksheet('title',optimiser)
        factorscores_sheet.clear()
        factorscores_sheet.set_dataframe(factorScores,(1,1))
    except:
        print('Error for: ', optimiser)
        pass

Error for:  MLW




Error for:  GLS


In [8]:
#RANDOM EFFECTS
model_randomeffects = semopy.ModelEffects(structural_model_spec)
model_randomeffects.fit(consolidated_sem_input_rc_norm.fillna(0), group='object_id')

coeff_df = model_randomeffects.inspect()
model_estimates_sheet = SEM_ModelEstimates_ConsolidatedRC_google_sheet.worksheet('title','RandomEffects')
model_estimates_sheet.clear()
model_estimates_sheet.set_dataframe(coeff_df,(1,1))

factorScores = model_randomeffects.predict_factors(consolidated_sem_input_rc_norm.fillna(0))
factorscores_sheet = SEM_FactorScores_ConsolidatedRC_google_sheet.worksheet('title','RandomEffects')
factorscores_sheet.clear()
factorscores_sheet.set_dataframe(factorScores,(1,1))