In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

In [9]:
df_train = pd.read_csv('Inputdata.csv')
dataset.head()

Unnamed: 0,FID,OBJECTID,Id,POINT_X,POINT_Y,Elevation,Slope,Aspect,TWI,RiverDista,RiverDensi,Rainfall,Lithology,Landuse,SPI,FlowDirect,Curvature1
0,0,1,0,396353.443,4137844.591,1335,17.448,7,7,0.0,1.07952,692.544006,5,4,0.000475,32,0.520449
1,1,2,0,399363.1496,4136873.347,590,13.6558,6,7,100.0,1.4213,682.109009,5,4,0.60889,4,-0.049309
2,2,3,0,402320.3307,4137711.189,824,21.322399,5,0,360.554993,1.24148,648.546021,5,4,-1.85275,1,2.69709
3,3,4,0,405750.182,4137560.292,692,40.234901,7,0,100.0,1.58877,614.223022,5,4,-5.16362,16,1.79592
4,4,5,0,380207.4744,4140377.675,942,24.434099,6,7,447.213989,1.35956,560.447022,5,4,0.000247,16,0.900259


In [11]:
factor_cols = [
    "Elevation",
    "Slope",
    "Aspect",
    "TWI",
    "RiverDista",
    "RiverDensi",
    "Rainfall",
    "Lithology",
    "Landuse",
    "SPI",
    "FlowDirect",
    "Curvature1"
]

X = df_train[factor_cols].copy()

In [13]:
def compute_vif(X_df: pd.DataFrame) -> pd.DataFrame:
    X_vals = X_df.values
    k = X_vals.shape[1]
    vif_list = []

    for j in range(k):
        y = X_vals[:, j]
        X_others = np.delete(X_vals, j, axis=1)

        model = LinearRegression()
        model.fit(X_others, y)
        r2 = model.score(X_others, y)

        vif = np.inf if r2 >= 1 else 1.0 / (1.0 - r2)
        vif_list.append(vif)

    return pd.DataFrame({"Factor": X_df.columns, "VIF": vif_list})

vif_df = compute_vif(X)
print("\n=== VIF ===")
print(vif_df.round(3))
vif_df.to_csv("vif_train.csv", index=False)



=== VIF ===
        Factor    VIF
0    Elevation  2.854
1        Slope  1.322
2       Aspect  1.073
3          TWI  1.028
4   RiverDista  2.678
5   RiverDensi  2.818
6     Rainfall  1.846
7    Lithology  1.307
8      Landuse  1.349
9          SPI  1.447
10  FlowDirect  1.088
11  Curvature1  1.484


In [15]:
def compute_redundancy_index(X_df: pd.DataFrame) -> pd.DataFrame:
    X_vals = X_df.values
    k = X_vals.shape[1]
    redundancy_list = []

    for j in range(k):
        y = X_vals[:, j]
        X_others = np.delete(X_vals, j, axis=1)

        model = LinearRegression()
        model.fit(X_others, y)
        r2 = model.score(X_others, y)

        redundancy_list.append(r2)

    return pd.DataFrame({"Factor": X_df.columns,
                         "Redundancy_Index_R2": redundancy_list})

ri_df = compute_redundancy_index(X)
print("\n=== Redundancy Index (R²) ===")
print(ri_df.round(3))
ri_df.to_csv("redundancy_index_train.csv", index=False)



=== Redundancy Index (R²) ===
        Factor  Redundancy_Index_R2
0    Elevation                0.650
1        Slope                0.244
2       Aspect                0.068
3          TWI                0.028
4   RiverDista                0.627
5   RiverDensi                0.645
6     Rainfall                0.458
7    Lithology                0.235
8      Landuse                0.259
9          SPI                0.309
10  FlowDirect                0.081
11  Curvature1                0.326


In [17]:
def compute_condition_index(X_df: pd.DataFrame) -> pd.DataFrame:
    scaler = StandardScaler()
    X_std = scaler.fit_transform(X_df.values)

    cov_mat = np.cov(X_std, rowvar=False)
    eigenvalues, _ = np.linalg.eig(cov_mat)

    idx = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[idx]

    max_eig = np.max(eigenvalues)
    cond_index = np.sqrt(max_eig / eigenvalues)

    return pd.DataFrame({
        "Dimension": np.arange(1, len(eigenvalues) + 1),
        "Eigenvalue": eigenvalues.real,
        "Condition_Index": cond_index.real
    })

cond_df = compute_condition_index(X)
print("\n=== Condition Index ===")
print(cond_df.round(3))
cond_df.to_csv("condition_index_train.csv", index=False)



=== Condition Index ===
    Dimension  Eigenvalue  Condition_Index
0           1       2.819            1.000
1           2       1.688            1.292
2           3       1.292            1.477
3           4       1.185            1.542
4           5       1.080            1.616
5           6       0.978            1.698
6           7       0.870            1.800
7           8       0.658            2.070
8           9       0.572            2.220
9          10       0.447            2.512
10         11       0.228            3.520
11         12       0.214            3.631
