In [3]:
import semopy
import pandas as pd
import numpy as np

In [20]:
X_imputed = pd.read_csv('D:/Projects/IDEA-FRM/InputData/SEM_X_Imputed.csv')

In [21]:
points = X_imputed[['x','y','yr']]
X_imputed = X_imputed.drop(['x','y','yr','index'],axis=1)
X_imputed.shape

(392445, 88)

In [22]:
cat_variables = X_imputed.columns[np.array(range(48,62,1))]
cat_variables

Index(['assam_soil_loam, silt loam, silt, sandy loam',
       'assam_soil_loamy sand, sand',
       'assam_soil_rocky, other non-soil categories',
       'assam_lith_neogene sedimentary rock',
       'assam_lith_paleogene sedementary rock', 'assam_lith_paleozoic rock',
       'assam_lith_quaternary sediments',
       'assam_lith_tertiary sedimentary rocks',
       'assam_lith_undeveloped precambrian rock', 'assam_lith_water',
       'land use_built', 'land use_range land', 'land use_vegetation',
       'land use_water'],
      dtype='object')

In [None]:
#Normalise the measured variables.
from sklearn.preprocessing import StandardScaler
X_cat = X_imputed[cat_variables]
X_con = X_imputed.drop(cat_variables,axis=1)

standardized_data = StandardScaler().fit_transform(X_con)

X_con = pd.DataFrame(standardized_data,columns=X_con.columns)
X_std_imputed = pd.concat([X_con,X_cat],axis=1)

In [None]:
X_std_imputed = pd.concat([X_std_imputed,points],axis=1)

In [None]:
#Remove anomalies through Isolation Forests
from sklearn.ensemble import IsolationForest
clf = IsolationForest(max_samples='auto', random_state=np.random.RandomState(42))
clf.fit(X_std_imputed.drop(['x','y','yr'],axis=1))

X_std_imputed['scores'] = clf.decision_function(X_std_imputed.drop(['x','y','yr'],axis=1))

X_std_imputed['anomaly_score'] = clf.predict(X_std_imputed.drop(['x','y','yr','scores'], axis=1))

X_std_imputed_removedanomalies = X_std_imputed[X_std_imputed['anomaly_score']==1]
anomalies = X_std_imputed[X_std_imputed['anomaly_score']==-1]

X_std_imputed_removedanomalies.drop(['scores','anomaly_score'],axis=1).to_csv('D:/Projects/IDEA-FRM/InputData/SEM_X_Imputed_RemovedAnomalies.csv', index=False)
anomalies.drop(['scores','anomaly_score'],axis=1).to_csv('D:/Projects/IDEA-FRM/InputData/Anomalies.csv', index=False)

# SEM

In [18]:
# Read the prepared dataset of measured variables (Xs) after Random Forest Imputation of Null Values, Standardisation and Removal of Anomalies
X_std_imputed = pd.read_csv('D:/Projects/IDEA-FRM/InputData/SEM_X_Imputed_RemovedAnomalies.csv').drop(['x','y'],axis=1)

In [23]:
cat_variables = ['assam_soil_loam, silt loam, silt, sandy loam',
       'assam_soil_loamy sand, sand',
       'assam_soil_rocky, other non-soil categories',
       'assam_lith_neogene sedimentary rock',
       'assam_lith_paleogene sedementary rock', 'assam_lith_paleozoic rock',
       'assam_lith_quaternary sediments',
       'assam_lith_tertiary sedimentary rocks',
       'assam_lith_undeveloped precambrian rock', 'assam_lith_water',
       'land use_built', 'land use_range land', 'land use_vegetation',
       'land use_water']

In [24]:
X_std_imputed_cat = X_std_imputed[cat_variables]
X_std_imputed_con = X_std_imputed.drop(cat_variables,axis=1)

In [25]:
fiscal_variables = X_imputed.columns[np.array(range(62,88,1))]
fiscal_variables

Index(['Count_Road', 'Sum_Roads', 'Count_Erosion', 'Sum_Erosion', 'Count_IM',
       'Sum_IM', 'Count_new', 'Sum_new', 'Count_repair', 'Sum_repair',
       'Count_relief', 'Sum_relief', 'Count_goods', 'Sum_goods', 'Count_Total',
       'Sum_Total', 'Count_SDRF', 'Sum_SDRF', 'Count_SOPD', 'Sum_SOPD',
       'Count_CIDF', 'Sum_CIDF', 'Count_LTIF', 'Sum_LTIF', 'Count_RIDF',
       'Sum_RIDF'],
      dtype='object')

In [None]:
# Find correlations for residual correlations
X_std_imputed_con.corr().to_csv('Results/CorrelationMatrix.csv',index=True)

In [27]:
#SEM Model Specification - only flood impact
model_spec1 = """
# measurement model
flood_intensity =~ assam_dist_from_major_rivers_updated_3857 + sum + GCN250_ARCIII_average + strm_filled_slope_degrees + ndvi + srtm_filled_dem + gmted_drainage_density_without_1
demography =~ ind_ppp_UNadj + aged + young + sexratio + percaay + deprived + nophone + noSanitation + nodrinkingWater + totLivestock
infra_access =~ ndbi + proximity_hosptial_rd + proximity_embankment_rd + proximity_rail_rd + proximity_local_rd + proximity_arterial_rd
flood_impact =~ damage_POPULATION_AFFECTED + damage_humanliveslost + damage_animalsaffectedtotal + damage_animalsaffectedpoultry + damage_animalsaffectedbig + damage_animalsaffectedsmall + damage_animals_washed_total + damage_animals_washed_poultry + damage_animals_washed_big + damage_animals_washed_small + damage_Houses_damaged_fully + damage_Houses_damaged_partially + damage_croparea_AFFECTED + Embankment + Other + Road + Bridge

# regressions
flood_impact ~ flood_intensity + demography + infra_access

# residual correlations
ind_ppp_UNadj ~~ aged
ind_ppp_UNadj ~~ young
ind_ppp_UNadj ~~ percaay
ind_ppp_UNadj ~~ deprived
ind_ppp_UNadj ~~ nophone
ind_ppp_UNadj ~~ noSanitation
ind_ppp_UNadj ~~ totLivestock
ind_ppp_UNadj ~~ nodrinkingWater
aged ~~ young
aged ~~ percaay
aged ~~ deprived
aged ~~ nophone
aged ~~ noSanitation
aged ~~ totLivestock
young ~~ percaay
young ~~ deprived
young ~~ nophone
young ~~ noSanitation
young ~~ totLivestock
percaay ~~ deprived
percaay ~~ nophone
percaay ~~ noSanitation
percaay ~~ totLivestock
deprived ~~ nophone
deprived ~~ noSanitation
deprived ~~ totLivestock
nophone ~~ noSanitation
nophone ~~ totLivestock
noSanitation ~~ nodrinkingWater
proximity_hosptial_rd ~~ proximity_local_rd
proximity_hosptial_rd ~~ proximity_arterial_rd
proximity_hosptial_rd ~~ gmted_drainage_density_without_1
proximity_hosptial_rd ~~ srtm_filled_dem
proximity_local_rd ~~ proximity_arterial_rd
ndbi ~~ ndvi
gmted_drainage_density_without_1 ~~ srtm_filled_dem
gmted_drainage_density_without_1 ~~ GCN250_ARCIII_average
GCN250_ARCIII_average ~~ ndvi
damage_animalsaffectedtotal ~~ damage_animalsaffectedpoultry
damage_animalsaffectedtotal ~~ damage_animalsaffectedbig
damage_animalsaffectedtotal ~~ damage_animalsaffectedsmall
damage_animalsaffectedpoultry ~~ damage_animalsaffectedtotal
damage_animalsaffectedpoultry ~~ damage_animalsaffectedsmall
damage_animalsaffectedbig ~~ damage_animalsaffectedsmall
damage_animals_washed_total ~~ damage_animals_washed_poultry
damage_animals_washed_total ~~ damage_animals_washed_big
damage_animals_washed_total ~~ damage_animals_washed_small
damage_animals_washed_small ~~ damage_animals_washed_big
damage_Houses_damaged_fully ~~ damage_Houses_damaged_partially
"""

In [28]:
# Model specification -1 
model = semopy.Model(model_spec1)

In [17]:
# Fit Model
model.fit(X_std_imputed,
         obj='MLW',
         solver='SLSQP')
#SLSQP BFGS TNC SUMSL HUMSL Adam



SolverResult(fun=inf, success=False, n_it=60, x=array([ 1.66314162e+00,  5.49133220e+00, -2.13551957e+00, -5.16947666e+00,
       -4.70355865e+00,  5.61518058e+00, -5.13189490e+00, -5.24352915e+00,
        3.42856281e+00, -3.72510205e-02, -5.54755401e-01,  1.55261386e-01,
       -1.09055844e+01, -1.25811611e+00, -7.14272820e+00,  3.24032252e+00,
       -2.13000532e+00,  4.83168020e+00,  3.95543930e+00,  3.39909081e+00,
       -1.89062031e-01, -6.99176581e-01,  9.85120845e-01,  6.27095582e+00,
       -1.54144522e+01, -2.61590803e-01, -1.50957311e+00, -7.81217134e-01,
        1.13737317e+00, -6.28781500e+00,  3.28021829e+00, -3.87965455e+00,
       -1.91755984e+00, -2.21031493e-01, -2.51987567e+00, -1.76463684e+00,
        3.91408071e+00,  1.00661628e+00, -5.16037814e-01, -2.37829414e+00,
       -7.06491567e+00, -8.24347774e+00, -6.31834842e+00, -1.18965947e+01,
       -3.23929876e+00, -2.66247170e+00, -2.49473670e+00,  4.02696586e-01,
       -4.08561148e+00, -6.40263704e-01,  1.99388860

In [None]:
# Save co-efficient estimates in a dataframe
coeff_df = model.inspect()

In [None]:
coeff_df

In [None]:
coeff_df[coeff_df.lval=='flood_impact']

In [None]:
stats = semopy.calc_stats(model)
print(stats.T)

## Other model specifications

In [None]:
#SEM Model Specification
model_spec = """
# measurement model
Intensity =~ Inundation + assam_dist_from_major_rivers_updated_3857 + sum + GCN250_ARCIII_average + strm_filled_slope_degrees + ndvi + srtm_filled_dem + gmted_drainage_density_without_1
demography =~ ind_ppp_UNadj + aged + young + sexratio + percaay + deprived + nophone + noSanitation + nodrinkingWater + totLivestock
infra_access =~ ndbi + proximity_hosptial_rd + proximity_embankment_rd + proximity_rail_rd + proximity_local_rd + proximity_arterial_rd

frims_response =~ response_inmatesinReliefCamps + Relief_cam + Relief_dis + Rice + Salt + Oil + Dal
Sum_Total =~ Sum_SDRF + Sum_relief + Sum_new + Sum_Erosion + Sum_Roads + Sum_repair + Sum_IM
Count_Total =~ Count_SDRF + Count_relief + Count_new + Count_Erosion + Count_Road + Count_repair + Count_IM
frims_damages =~ 
Preparedness =~ response_inmatesinReliefCamps + Relief_cam + Relief_dis + Rice + Salt + Oil + Dal +  Count_SDRF + Count_relief + Count_new + Count_Erosion + Count_Road + Count_repair + Count_IM + Sum_SDRF + Sum_relief + Sum_new + Sum_Erosion + Sum_Roads + Sum_repair + Sum_IM

flood_impact =~ damage_POPULATION_AFFECTED + damage_humanliveslost + damage_animalsaffectedtotal + damage_animalsaffectedpoultry + damage_animalsaffectedbig + damage_animalsaffectedsmall + damage_animals_washed_total + damage_animals_washed_poultry + damage_animals_washed_big + damage_animals_washed_small + damage_Houses_damaged_fully + damage_Houses_damaged_partially + damage_croparea_AFFECTED + Embankment + Other + Road + Bridge
DEFINE(ordinal) Count_SDRF Count_relief Count_new Count_Erosion Count_Road Count_repair Count_IM Sum_SDRF Sum_relief Sum_new Sum_Erosion Sum_Roads Sum_repair Sum_IM

# regressions
flood_impact ~ Intensity + demography + infra_access
Preparedness ~ Intensity + demography + infra_access + flood_impact

# residual correlations
ind_ppp_UNadj ~~ aged
ind_ppp_UNadj ~~ young
ind_ppp_UNadj ~~ percaay
ind_ppp_UNadj ~~ deprived
ind_ppp_UNadj ~~ nophone
ind_ppp_UNadj ~~ noSanitation
ind_ppp_UNadj ~~ totLivestock
ind_ppp_UNadj ~~ nodrinkingWater
aged ~~ young
aged ~~ percaay
aged ~~ deprived
aged ~~ nophone
aged ~~ noSanitation
aged ~~ totLivestock
young ~~ percaay
young ~~ deprived
young ~~ nophone
young ~~ noSanitation
young ~~ totLivestock
percaay ~~ deprived
percaay ~~ nophone
percaay ~~ noSanitation
percaay ~~ totLivestock
deprived ~~ nophone
deprived ~~ noSanitation
deprived ~~ totLivestock
nophone ~~ noSanitation
nophone ~~ totLivestock
noSanitation ~~ nodrinkingWater
proximity_hosptial_rd ~~ proximity_local_rd
proximity_hosptial_rd ~~ proximity_arterial_rd
proximity_hosptial_rd ~~ gmted_drainage_density_without_1
proximity_hosptial_rd ~~ srtm_filled_dem
proximity_local_rd ~~ proximity_arterial_rd
ndbi ~~ ndvi
gmted_drainage_density_without_1 ~~ srtm_filled_dem
gmted_drainage_density_without_1 ~~ GCN250_ARCIII_average
GCN250_ARCIII_average ~~ ndvi
damage_animalsaffectedtotal ~~ damage_animalsaffectedpoultry
damage_animalsaffectedtotal ~~ damage_animalsaffectedbig
damage_animalsaffectedtotal ~~ damage_animalsaffectedsmall
damage_animalsaffectedpoultry ~~ damage_animalsaffectedtotal
damage_animalsaffectedpoultry ~~ damage_animalsaffectedsmall
damage_animalsaffectedbig ~~ damage_animalsaffectedsmall
damage_animals_washed_total ~~ damage_animals_washed_poultry
damage_animals_washed_total ~~ damage_animals_washed_big
damage_animals_washed_total ~~ damage_animals_washed_small
damage_animals_washed_small ~~ damage_animals_washed_big
damage_Houses_damaged_fully ~~ damage_Houses_damaged_partially

damage_animalsaffectedsmall ~~ Rice
damage_animalsaffectedsmall ~~ Dal
Rice ~~ Dal
Rice ~~ Salt
damage_animalsaffectedpoultry ~~ Rice
damage_animalsaffectedpoultry ~~ Dal
damage_animalsaffectedtotal ~~ Rice
damage_animalsaffectedtotal ~~ Dal
damage_animalsaffectedbig ~~ Rice
damage_animalsaffectedbig ~~ Dal
Relief_cam ~~ response_inmatesinReliefCamps
response_inmatesinReliefCamps ~~ damage_Houses_damaged_fully
response_inmatesinReliefCamps ~~ damage_Houses_damaged_partially

Count_Road ~~ Count_IM
Count_Road ~~ Count_new
Count_Road ~~ Count_repair


Sum_Roads ~~ Sum_new
Sum_Roads ~~ Sum_repair

Count_Erosion ~~ Sum_Erosion
"""

In [None]:
model_spec_check = '''
# measurement model
Inundation =~ assam_dist_from_major_rivers_updated_3857 + sum + GCN250_ARCIII_average + strm_filled_slope_degrees + ndvi + srtm_filled_dem + gmted_drainage_density_without_1
demography =~ ind_ppp_UNadj + aged + young + sexratio + percaay + deprived + nophone + noSanitation + nodrinkingWater + totLivestock
infra_access =~ ndbi + proximity_hosptial_rd + proximity_embankment_rd + proximity_rail_rd + proximity_local_rd + proximity_arterial_rd

frims_response =~ response_inmatesinReliefCamps + Relief_cam + Relief_dis + Rice + Salt + Oil + Dal
Sum_Total =~ Sum_SDRF + Sum_relief + Sum_new + Sum_Erosion + Sum_Roads + Sum_repair + Sum_IM
Count_Total =~ Count_SDRF + Count_relief + Count_new + Count_Erosion + Count_Road + Count_repair + Count_IM
frims_damages =~ damage_POPULATION_AFFECTED + damage_humanliveslost + damage_animalsaffectedtotal + damage_animalsaffectedpoultry + damage_animalsaffectedbig + damage_animalsaffectedsmall + damage_animals_washed_total + damage_animals_washed_poultry + damage_animals_washed_big + damage_animals_washed_small + damage_Houses_damaged_fully + damage_Houses_damaged_partially + damage_croparea_AFFECTED + Embankment + Other + Road + Bridge
Preparedness =~ Sum_Total + Count_Total + frims_response + frims_damages

# regressions
Preparedness ~ Inundation + demography + infra_access

# residual correlations
frims_response ~~ frims_damages
Sum_Total ~~ frims_damages
Count_Total ~~ frims_damages
Count_Total ~~ Sum_Total
'''

In [None]:
# Model specification -1 
model_check = semopy.Model(model_spec_check)

In [None]:
# Fit Model
model_check.fit(X_std_imputed_till2021.sample(100),
         obj='WLS')

In [None]:
coeff_df_check = model_check.inspect()

In [None]:
coeff_df_check

In [None]:
from semopy import ModelEffects

In [None]:
model_re = semopy.ModelEffects(model_spec1)

In [None]:
model_re.fit(X_std_imputed, groups=['revenue_ci'])

In [None]:
X_std_imputed

In [166]:
data

Unnamed: 0,x1,x2,y1,y2,y3,y4,cluster,group
0,-0.792079,-1.508534,-28.22713,-3.086247,-9.848543,1.642276,0,0
1,3.977028,5.40074,-15.323466,-4.774907,0.723464,-2.95508,1,1
2,-4.060853,-3.671972,-8.328913,-4.322402,-2.02566,-1.457362,2,2
3,5.839099,1.040212,-14.749171,-6.101894,-10.478908,5.428051,3,3
4,1.185336,5.49326,-3.237661,-3.388142,-11.853252,-7.117686,4,4
5,8.400328,0.560546,-4.506895,4.478732,-7.314287,-4.117323,5,5
6,-0.801675,-8.032487,-4.928389,2.029057,-4.220494,0.074508,6,6
7,-1.62471,-5.471683,-9.826293,-1.324649,-0.37659,0.584255,7,7
8,-6.925354,-5.320993,-12.851574,-1.926781,-10.261392,-7.99016,8,8
9,0.20123,-4.351921,-19.301696,-3.958672,7.93664,0.852558,9,9
