In [10]:
import pandas as pd

# Load your dataset (example: CSV file)
df = pd.read_csv("plot_price_DS.csv")

# Print column names
print(df.columns)


Index(['NAME', 'Garden land without Road Access',
       'Garden land with Road Access', 'Wet Land', 'Rocky Land',
       'Residential Plot with Private Road Access',
       'Residential Plot with NH/PWD Road Acess',
       'Commercially Important Plot'],
      dtype='object')


In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load dataset
df = pd.read_csv("sorted_data.csv")  # Update with your dataset path

# Define target variables
targets = ['PRECIPITATION RATE', 'SOIL MOISTURE', 'WIND SPEED',
           'SURFACE TEMPERATURE', 'DEEP SOIL TEMPERATURE']

# Define feature columns
features = ['YEAR', 'MONTH', 'NAME', 'AVRG ELEVATION', 'MIN ELEVATION', 'MAX ELEVATION']

# Split dataset (70% training, 10% validation, 20% testing)
train_data, temp_data = train_test_split(df, test_size=0.3, shuffle=True, random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=2/3, shuffle=True, random_state=42)

# Extract input (X) and output (y)
X_train, y_train = train_data[features], train_data[targets]
X_val, y_val = val_data[features], val_data[targets]
X_test, y_test = test_data[features], test_data[targets]

# Define base models
rf = RandomForestRegressor(n_estimators=150, random_state=42)
xgb = XGBRegressor(n_estimators=150, learning_rate=0.1, random_state=42)

# Create a stacking ensemble model wrapped in MultiOutputRegressor
stack_model = MultiOutputRegressor(StackingRegressor(
    estimators=[('rf', rf), ('xgb', xgb)],
    final_estimator=LinearRegression()
))

# Train the model
stack_model.fit(X_train, y_train)

# Predictions
y_val_pred = stack_model.predict(X_val)
y_test_pred = stack_model.predict(X_test)

# Convert predictions to DataFrame with aligned index
y_val_pred_df = pd.DataFrame(y_val_pred, columns=targets, index=val_data.index)
y_test_pred_df = pd.DataFrame(y_test_pred, columns=targets, index=test_data.index)

# Save validation results for accuracy comparison
validation_results = val_data[['YEAR', 'MONTH', 'NAME']].copy()
for col in targets:
    validation_results[f'Actual_{col}'] = y_val[col].values
    validation_results[f'Predicted_{col}'] = y_val_pred_df[col].values

validation_results.to_csv("validation_results.csv", index=False)

# Evaluation Metrics - Separate for Each Target
print("\nValidation Set Performance:")
for col in targets:
    mae = mean_absolute_error(y_val[col], y_val_pred_df[col])
    rmse = np.sqrt(mean_squared_error(y_val[col], y_val_pred_df[col]))
    r2 = r2_score(y_val[col], y_val_pred_df[col])
    print(f"{col}: MAE = {mae:.4f}, RMSE = {rmse:.4f}, R² = {r2:.4f}")

print("\nTest Set Performance:")
for col in targets:
    mae = mean_absolute_error(y_test[col], y_test_pred_df[col])
    rmse = np.sqrt(mean_squared_error(y_test[col], y_test_pred_df[col]))
    r2 = r2_score(y_test[col], y_test_pred_df[col])
    print(f"{col}: MAE = {mae:.4f}, RMSE = {rmse:.4f}, R² = {r2:.4f}")

# Future Forecasting (Next 10 Years)
future_years = list(range(df['YEAR'].max() + 1, df['YEAR'].max() + 11))
future_data = []

for year in future_years:
    for month in range(1, 13):
        for village in df['NAME'].unique():
            avg_elevation = df[df['NAME'] == village]['AVRG ELEVATION'].mean()
            min_elevation = df[df['NAME'] == village]['MIN ELEVATION'].mean()
            max_elevation = df[df['NAME'] == village]['MAX ELEVATION'].mean()
            future_data.append([year, month, village, avg_elevation, min_elevation, max_elevation])

future_df = pd.DataFrame(future_data, columns=features)

# Predict future values
future_predictions = stack_model.predict(future_df)

# Convert future predictions to DataFrame
future_pred_df = future_df.copy()
for i, col in enumerate(targets):
    future_pred_df[col] = future_predictions[:, i]

# Save future forecasts
future_pred_df.to_csv("future_forecast.csv", index=False)

print("\n✅ Future forecasts saved to 'future_forecast.csv'.")



Validation Set Performance:
PRECIPITATION RATE: MAE = 0.0154, RMSE = 0.0220, R² = 0.9898
SOIL MOISTURE: MAE = 0.0135, RMSE = 0.0173, R² = 0.9945
WIND SPEED: MAE = 0.0134, RMSE = 0.0176, R² = 0.9950
SURFACE TEMPERATURE: MAE = 0.0121, RMSE = 0.0168, R² = 0.9891
DEEP SOIL TEMPERATURE: MAE = 0.0098, RMSE = 0.0137, R² = 0.9935

Test Set Performance:
PRECIPITATION RATE: MAE = 0.0175, RMSE = 0.0260, R² = 0.9862
SOIL MOISTURE: MAE = 0.0131, RMSE = 0.0183, R² = 0.9942
WIND SPEED: MAE = 0.0139, RMSE = 0.0189, R² = 0.9947
SURFACE TEMPERATURE: MAE = 0.0120, RMSE = 0.0170, R² = 0.9891
DEEP SOIL TEMPERATURE: MAE = 0.0105, RMSE = 0.0151, R² = 0.9916

✅ Future forecasts saved to 'future_forecast.csv'.


In [None]:
"""
Since elevation affects the severity of floods, landslides, and storms, we now include:

    - Average Elevation (AE)
    - Minimum Elevation (ME)
    - Maximum Elevation (MAXE)

1️⃣ Flood Risk Probability 🌊
PFlood = α1⋅PR + α2⋅SM − α3⋅ST − α4⋅AE

   - Higher AE → Lower flood risk
   - Higher PR & SM → Higher flood risk
   - Higher ST → Lower flood risk

2️⃣ Landslide Risk Probability 🏔️
PLandslide = β1⋅PR + β2⋅SM + β3⋅DST + β4⋅AE

   - Higher AE → Higher landslide risk
   - Higher PR, SM, DST → Higher landslide risk
   - Low AE but high MAXE → Risky if slope is steep

✅ Slope Consideration:
Slope = (MAXE − ME) / Distance

   - If Slope > Threshold, landslide risk increases.

3️⃣ Storm Risk Probability 🌪️
PStorm = γ1⋅WS + γ2⋅PR − γ3⋅ST − γ4⋅AE

   - Higher AE → Lower storm risk
   - Higher WS & PR → Higher storm risk
   - Higher ST → Lower storm risk
"""


In [13]:
import numpy as np
import pandas as pd
import optuna
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.linear_model import Ridge
from xgboost import XGBRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load your dataset (replace with actual data loading step)
df = pd.read_csv("sorted_data.csv")

# Define features and target columns
feature_columns = ['YEAR', 'MONTH', 'AVRG ELEVATION', 'MIN ELEVATION', 'MAX ELEVATION']
target_columns = ['PRECIPITATION RATE', 'SOIL MOISTURE', 'WIND SPEED', 'SURFACE TEMPERATURE', 'DEEP SOIL TEMPERATURE']

X = df[feature_columns]
y = df[target_columns]  # This is multi-output (shape: [n_samples, 5])

# Split the dataset
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the Optuna objective function
def objective(trial):
    # Hyperparameter tuning for XGBRegressor
    xgb_params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 300),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
    }

    # Define base estimators
    estimators = [
        ('rf', RandomForestRegressor(n_estimators=100, random_state=42)),
        ('xgb', XGBRegressor(**xgb_params))
    ]

    # Use MultiOutputRegressor to handle multiple target variables
    stack_model = MultiOutputRegressor(StackingRegressor(estimators=estimators, final_estimator=Ridge()))

    # Train model
    stack_model.fit(X_train, y_train)

    # Evaluate on validation set
    y_val_pred = stack_model.predict(X_val)
    
    # Calculate mean squared error across all target variables
    mse = mean_squared_error(y_val, y_val_pred, multioutput='raw_values')
    return np.mean(mse)  # Optuna minimizes this value

# Run Optuna optimization
study = optuna.create_study(direction="minimize", sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials=15)

# Best hyperparameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# Train final model with best parameters
best_xgb = XGBRegressor(**best_params)
final_stack_model = MultiOutputRegressor(StackingRegressor(estimators=[
    ('rf', RandomForestRegressor(n_estimators=100, random_state=42)),
    ('xgb', best_xgb)
], final_estimator=Ridge()))

final_stack_model.fit(X_train, y_train)

# Predict and evaluate
y_test_pred = final_stack_model.predict(X_val)
final_mse = mean_squared_error(y_val, y_test_pred, multioutput='raw_values')
print("Final MSE per target variable:", final_mse)
print("Average Final MSE:", np.mean(final_mse))


# Load dataset
df = pd.read_csv("sorted_data.csv")  # Update with your dataset path

# Define target variables
targets = ['PRECIPITATION RATE', 'SOIL MOISTURE', 'WIND SPEED',
           'SURFACE TEMPERATURE', 'DEEP SOIL TEMPERATURE']

# Define feature columns
features = ['YEAR', 'MONTH', 'NAME', 'AVRG ELEVATION', 'MIN ELEVATION', 'MAX ELEVATION']

# Split dataset (70% training, 10% validation, 20% testing)
train_data, temp_data = train_test_split(df, test_size=0.3, shuffle=True, random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=2/3, shuffle=True, random_state=42)

# Extract input (X) and output (y)
X_train, y_train = train_data[features], train_data[targets]
X_val, y_val = val_data[features], val_data[targets]
X_test, y_test = test_data[features], test_data[targets]

# Define base models
rf = RandomForestRegressor(n_estimators=150, random_state=42)
xgb = XGBRegressor(n_estimators=150, learning_rate=0.1, random_state=42)

# Create a stacking ensemble model wrapped in MultiOutputRegressor
stack_model = MultiOutputRegressor(StackingRegressor(
    estimators=[('rf', rf), ('xgb', xgb)],
    final_estimator=LinearRegression()
))

# Train the model
stack_model.fit(X_train, y_train)

# Predictions
y_val_pred = stack_model.predict(X_val)
y_test_pred = stack_model.predict(X_test)

# Convert predictions to DataFrame with aligned index
y_val_pred_df = pd.DataFrame(y_val_pred, columns=targets, index=val_data.index)
y_test_pred_df = pd.DataFrame(y_test_pred, columns=targets, index=test_data.index)

# Save validation results for accuracy comparison
validation_results = val_data[['YEAR', 'MONTH', 'NAME']].copy()
for col in targets:
    validation_results[f'Actual_{col}'] = y_val[col].values
    validation_results[f'Predicted_{col}'] = y_val_pred_df[col].values

validation_results.to_csv("validation_results.csv", index=False)

# Evaluation Metrics - Separate for Each Target
print("\nValidation Set Performance:")
for col in targets:
    mae = mean_absolute_error(y_val[col], y_val_pred_df[col])
    rmse = np.sqrt(mean_squared_error(y_val[col], y_val_pred_df[col]))
    r2 = r2_score(y_val[col], y_val_pred_df[col])
    print(f"{col}: MAE = {mae:.4f}, RMSE = {rmse:.4f}, R² = {r2:.4f}")

print("\nTest Set Performance:")
for col in targets:
    mae = mean_absolute_error(y_test[col], y_test_pred_df[col])
    rmse = np.sqrt(mean_squared_error(y_test[col], y_test_pred_df[col]))
    r2 = r2_score(y_test[col], y_test_pred_df[col])
    print(f"{col}: MAE = {mae:.4f}, RMSE = {rmse:.4f}, R² = {r2:.4f}")

# Future Forecasting (Next 10 Years)
future_years = list(range(df['YEAR'].max() + 1, df['YEAR'].max() + 11))
future_data = []

for year in future_years:
    for month in range(1, 13):
        for village in df['NAME'].unique():
            avg_elevation = df[df['NAME'] == village]['AVRG ELEVATION'].mean()
            min_elevation = df[df['NAME'] == village]['MIN ELEVATION'].mean()
            max_elevation = df[df['NAME'] == village]['MAX ELEVATION'].mean()
            future_data.append([year, month, village, avg_elevation, min_elevation, max_elevation])

future_df = pd.DataFrame(future_data, columns=features)

# Predict future values
future_predictions = stack_model.predict(future_df)

# Convert future predictions to DataFrame
future_pred_df = future_df.copy()
for i, col in enumerate(targets):
    future_pred_df[col] = future_predictions[:, i]

# Save future forecasts
future_pred_df.to_csv("future_forecast.csv", index=False)

print("\n✅ Future forecasts saved to 'future_forecast.csv'.")

[I 2025-03-27 09:58:34,800] A new study created in memory with name: no-name-b6eaeb3d-b3f4-45d6-8acc-b0cac5050341
[I 2025-03-27 09:58:46,286] Trial 0 finished with value: 0.0002874434989635743 and parameters: {'n_estimators': 88, 'max_depth': 7, 'learning_rate': 0.22757013842328452, 'subsample': 0.8557415571618794, 'colsample_bytree': 0.8427391800923942}. Best is trial 0 with value: 0.0002874434989635743.
[I 2025-03-27 09:58:58,223] Trial 1 finished with value: 0.00029300075148795603 and parameters: {'n_estimators': 142, 'max_depth': 6, 'learning_rate': 0.28686104491817904, 'subsample': 0.7281976236353602, 'colsample_bytree': 0.7182528151012229}. Best is trial 0 with value: 0.0002874434989635743.
[I 2025-03-27 09:59:08,352] Trial 2 finished with value: 0.0008659217577103888 and parameters: {'n_estimators': 79, 'max_depth': 4, 'learning_rate': 0.12113330123394402, 'subsample': 0.9624738299034488, 'colsample_bytree': 0.982433697762769}. Best is trial 0 with value: 0.0002874434989635743.


Best Hyperparameters: {'n_estimators': 292, 'max_depth': 5, 'learning_rate': 0.1850965438234399, 'subsample': 0.9883350405726145, 'colsample_bytree': 0.9919930022568942}
Final MSE per target variable: [0.00029855 0.00014197 0.00019761 0.0001808  0.00016329]
Average Final MSE: 0.00019644045932612383

Validation Set Performance:
PRECIPITATION RATE: MAE = 0.0154, RMSE = 0.0220, R² = 0.9898
SOIL MOISTURE: MAE = 0.0135, RMSE = 0.0173, R² = 0.9945
WIND SPEED: MAE = 0.0134, RMSE = 0.0176, R² = 0.9950
SURFACE TEMPERATURE: MAE = 0.0121, RMSE = 0.0168, R² = 0.9891
DEEP SOIL TEMPERATURE: MAE = 0.0098, RMSE = 0.0137, R² = 0.9935

Test Set Performance:
PRECIPITATION RATE: MAE = 0.0175, RMSE = 0.0260, R² = 0.9862
SOIL MOISTURE: MAE = 0.0131, RMSE = 0.0183, R² = 0.9942
WIND SPEED: MAE = 0.0139, RMSE = 0.0189, R² = 0.9947
SURFACE TEMPERATURE: MAE = 0.0120, RMSE = 0.0170, R² = 0.9891
DEEP SOIL TEMPERATURE: MAE = 0.0105, RMSE = 0.0151, R² = 0.9916

✅ Future forecasts saved to 'future_forecast.csv'.


In [14]:

import pandas as pd
import numpy as np

# Load forecasted data
forecast_df = pd.read_csv("future_forecast.csv")

# Coefficients for risk probabilities (these can be adjusted based on domain knowledge)
alpha = [0.4, 0.3, 0.2, 0.1]  # Flood Risk
beta = [0.3, 0.3, 0.2, 0.2]   # Landslide Risk
gamma = [0.4, 0.3, 0.2, 0.1]  # Storm Risk

# Define threshold for steep slope
SLOPE_THRESHOLD = 0.2  # Adjust based on terrain severity

def calculate_risk(row):
    """Calculate flood, landslide, and storm risk probabilities."""
    PR, SM, WS, ST, DST = row[['PRECIPITATION RATE', 'SOIL MOISTURE', 'WIND SPEED', 'SURFACE TEMPERATURE', 'DEEP SOIL TEMPERATURE']]
    AE, ME, MAXE = row[['AVRG ELEVATION', 'MIN ELEVATION', 'MAX ELEVATION']]
    
    # Compute slope
    slope = (MAXE - ME) / 1000  # Assuming distance ~1000m (adjust based on dataset)
    landslide_risk = beta[0] * PR + beta[1] * SM + beta[2] * DST + beta[3] * AE
    
    # Increase landslide risk if slope is steep
    if slope > SLOPE_THRESHOLD:
        landslide_risk += 0.1 * slope  # Additional risk factor based on slope severity
    
    return pd.Series({
        'Flood Risk': alpha[0] * PR + alpha[1] * SM - alpha[2] * ST - alpha[3] * AE,
        'Landslide Risk': landslide_risk,
        'Storm Risk': gamma[0] * WS + gamma[1] * PR - gamma[2] * ST - gamma[3] * AE
    })

# Apply risk calculations to each row
risk_df = forecast_df.copy()
risk_df[['Flood Risk', 'Landslide Risk', 'Storm Risk']] = forecast_df.apply(calculate_risk, axis=1)

# Save results to CSV
risk_df.to_csv("risk_forecast.csv", index=False)
print("\n✅ Risk probabilities saved to 'risk_forecast.csv'.")




✅ Risk probabilities saved to 'risk_forecast.csv'.


In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Load risk forecast data
risk_df = pd.read_csv("risk_forecast.csv")

# Define weights for different risks (adjustable)
weights = {
    'Flood Risk': 0.35,
    'Landslide Risk': 0.30,
    'Storm Risk': 0.25
}

# Normalize risk values per month and location
scaler = MinMaxScaler()
risk_df[['Flood Risk', 'Landslide Risk', 'Storm Risk']] = scaler.fit_transform(
    risk_df[['Flood Risk', 'Landslide Risk', 'Storm Risk']]
)

# Compute Disaster Susceptibility Index (DSI)
risk_df['Disaster Susceptibility Index'] = (
    risk_df['Flood Risk'] * weights['Flood Risk'] +
    risk_df['Landslide Risk'] * weights['Landslide Risk'] +
    risk_df['Storm Risk'] * weights['Storm Risk']
)

# Group by Year, Month, and Location (NAME) to get average DSI per location per month
dsi_df = risk_df.groupby(['YEAR', 'MONTH', 'NAME'], as_index=False)['Disaster Susceptibility Index'].mean()

# Save results
dsi_df.to_csv("disaster_susceptibility_index.csv", index=False)
print("\n✅ Disaster Susceptibility Index saved to 'disaster_susceptibility_index.csv'.")



✅ Disaster Susceptibility Index saved to 'disaster_susceptibility_index.csv'.


In [4]:
import pandas as pd

# Load Risk Forecast Data
risk_df = pd.read_csv("risk_forecast.csv")

# Load Disaster Susceptibility Index Data
dsi_df = pd.read_csv("disaster_susceptibility_index.csv")

# Merge on YEAR, MONTH, NAME
merged_df = pd.merge(risk_df, dsi_df, on=['YEAR', 'MONTH', 'NAME'], how='left')

# Save merged dataset
merged_df.to_csv("merged_risk_dsi.csv", index=False)
print("\n✅ Merged dataset saved as 'merged_risk_dsi.csv'.")



✅ Merged dataset saved as 'merged_risk_dsi.csv'.


In [6]:
import pandas as pd

# Load the datasets
risk_dsi_df = pd.read_csv("merged_risk_dsi.csv")
plot_price_df = pd.read_csv("plot_price_DS_2.csv")

# Print column names to check for 'YEAR', 'MONTH', 'NAME'
print("Columns in risk_dsi_df:", risk_dsi_df.columns)
print("Columns in plot_price_df:", plot_price_df.columns)


Columns in risk_dsi_df: Index(['YEAR', 'MONTH', 'NAME', 'AVRG ELEVATION', 'MIN ELEVATION',
       'MAX ELEVATION', 'PRECIPITATION RATE', 'SOIL MOISTURE', 'WIND SPEED',
       'SURFACE TEMPERATURE', 'DEEP SOIL TEMPERATURE', 'Flood Risk',
       'Landslide Risk', 'Storm Risk', 'Disaster Susceptibility Index'],
      dtype='object')
Columns in plot_price_df: Index(['NAME', 'Garden land without Road Access',
       'Garden land with Road Access', 'Wet Land', 'Rocky Land',
       'Residential Plot with Private Road Access',
       'Residential Plot with NH/PWD Road Acess',
       'Commercially Important Plot'],
      dtype='object')


In [17]:
import pandas as pd
import numpy as np

# Load datasets
risk_dsi_df = pd.read_csv('merged_risk_dsi.csv')  # Contains yearly & monthly DSI
plot_price_df = pd.read_csv('plot_price_DS_2.csv')  # Contains initial plot prices

# Mapping for numerical NAME values to their respective place names
name_mapping = {
    0.0625: "Alakode", 0.125: "Arakkulam", 0.1875: "Elappally",
    0.25: "Kanjikuzhy", 0.3125: "Karimkunnam", 0.375: "Karimannoor",
    0.4375: "Kodikkulam", 0.5: "Kudayathoor", 0.5625: "Kumaramangalam",
    0.625: "Manakkad", 0.6875: "Muttom", 0.75: "Purapuzha",
    0.8125: "Udumbannoor", 0.875: "Vannappuram", 0.9375: "Velliyamattom",
    1.0: "Keerikode"
}

# Convert NAME to numeric (if stored as a string)
risk_dsi_df['NAME'] = risk_dsi_df['NAME'].astype(float)
plot_price_df['NAME'] = plot_price_df['NAME'].astype(float)

# Convert plot price columns to numeric
for col in plot_price_df.columns[1:]:
    plot_price_df[col] = pd.to_numeric(plot_price_df[col], errors='coerce').replace(0, np.nan)

# Step 1: Fill NaNs using interpolation for each NAME
plot_price_df = plot_price_df.groupby('NAME', group_keys=False).apply(
    lambda group: group.interpolate(method='linear', limit_direction='both')
)

# Step 2: Fill remaining NaNs using median values
plot_price_df.fillna(plot_price_df.median(), inplace=True)

# Define yearly inflation rates (2024-2034)
inflation_rates = {
    2024: 1.048, 2025: 1.043, 2026: 1.042,
    2027: 1.041, 2028: 1.040, 2029: 1.039,
    2030: 1.038, 2031: 1.037, 2032: 1.036,
    2033: 1.035, 2034: 1.034
}

# Convert 'YEAR' and 'MONTH' to numeric
risk_dsi_df['YEAR'] = risk_dsi_df['YEAR'].astype(int)
risk_dsi_df['MONTH'] = risk_dsi_df['MONTH'].astype(int)

# Function to adjust plot prices based on DSI & inflation
def adjust_prices(name, year, month, base_prices, dsi_value):
    """Applies disaster susceptibility index (DSI) adjustment & inflation."""
    
    adjustment_factor = 1 / (1 + dsi_value)  # Inverse relation with DSI
    inflation_factor = np.prod([inflation_rates[y] for y in range(2024, year + 1)])  # Inflation over time

    new_prices = {}
    for col, price_value in base_prices.items():
        if pd.notna(price_value):  # Ignore NaNs
            new_prices[col] = float(price_value) * adjustment_factor * inflation_factor
        else:
            new_prices[col] = np.nan  # Keep structure
    return new_prices

# Create a single DataFrame to store all results
all_data = []

# Generate dataset for each month from 2024-2034
for year in range(2024, 2035):
    for month in range(1, 13):  # Loop through all months
        
        # Filter DSI values for the current year and month
        dsi_filtered = risk_dsi_df[(risk_dsi_df['YEAR'] == year) & (risk_dsi_df['MONTH'] == month)]
        
        for _, row in dsi_filtered.iterrows():
            name_num = row['NAME']
            dsi_value = row['Disaster Susceptibility Index']
            
            # Get the corresponding place name
            name_str = name_mapping.get(name_num, f"Unknown_{name_num}")

            # Extract base plot prices for NAME
            base_prices = plot_price_df[plot_price_df['NAME'] == name_num].iloc[:, 1:].squeeze()

            if base_prices.empty:
                print(f"⚠️ No base prices found for NAME: {name_str} ({name_num}) in {year}-{month}!")
                continue  # Skip missing data
            
            new_prices = adjust_prices(name_num, year, month, base_prices.to_dict(), dsi_value)

            new_row = {'NAME': name_str, 'YEAR': year, 'MONTH': month, 'DSI': dsi_value, **new_prices}
            all_data.append(new_row)

# Convert to a single DataFrame
final_df = pd.DataFrame(all_data)

# Save to a single CSV file
final_df.to_csv('future_plot_prices_with_DSI_2024_2034.csv', index=False)

print("✅ Future plot prices with DSI (with mapped names) generated successfully: future_plot_prices_with_DSI_2024_2034.csv")


  plot_price_df = plot_price_df.groupby('NAME', group_keys=False).apply(


✅ Future plot prices with DSI (with mapped names) generated successfully: future_plot_prices_with_DSI_2024_2034.csv
