In [None]:
# Check columns in thresholds_df
print("Columns in thresholds_df:", thresholds_df.columns.tolist())


Columns in thresholds_df: ['District Name', 'District Population', 'Residential Energy Consumption (MU/year)', 'Residential Energy Consumption (kWh/year)', 'Per Capita Energy Consumption (kWh/year)', 'Source of Energy', 'Carbon Emission Factor (kg CO2/kWh)', 'Carbon Emissions (kg CO2/year)', 'Carbon Emissions (tons CO2/year)', 'Population Density (per sq. km)', 'Category', 'Sub-District', 'Sub-District Population', 'Emission Limit (kg CO2/year)', 'Emission Limit (kg CO2/month)', 'Low_Threshold', 'Moderate_Threshold', 'High_Threshold']


In [None]:
import pandas as pd
import numpy as np

# Step 1: Load the datasets
household_df = pd.read_csv('/content/10000_balanced_household_dataset.csv')  # Path to household dataset
thresholds_df = pd.read_csv('/content/updated_REM_dataset_with_thresholds (1).csv')  # Path to thresholds dataset

# Conversion factor from kWh to kg CO₂
conversion_factor = 0.75284

# Step 2: Calculate carbon emissions for each appliance in the household dataset
household_df['Carbon_Emission_per_Appliance (kg CO₂)'] = (
    household_df['Energy Consumption (kWh)'] * household_df['Monthly Usage Hours'] * conversion_factor
)

# Step 3: Sum emissions for all appliances in each household
household_carbon_emissions = household_df.groupby('Household ID')['Carbon_Emission_per_Appliance (kg CO₂)'].sum().reset_index()
household_carbon_emissions.rename(columns={'Carbon_Emission_per_Appliance (kg CO₂)': 'Total_Carbon_Emission (kg CO₂)'}, inplace=True)

# Step 4: Merge household information (Sub-District and District) to get regional thresholds
household_info = household_df[['Household ID', 'Sub-District', 'District Name']].drop_duplicates()
household_carbon_emissions = household_carbon_emissions.merge(household_info, on='Household ID', how='left')

# Ensure that the thresholds dataset has necessary columns
required_columns = ['Low_Threshold', 'Moderate_Threshold', 'High_Threshold', 'Sub-District', 'District Name']
if not all(col in thresholds_df.columns for col in required_columns):
    raise ValueError("Thresholds dataset is missing one or more required columns.")

# Step 5: Merge with thresholds dataset
combined_df = household_carbon_emissions.merge(thresholds_df, on=['Sub-District', 'District Name'], how='left')

# Check for missing values in the threshold columns after merging
if combined_df[['Low_Threshold', 'Moderate_Threshold', 'High_Threshold']].isnull().any().any():
    raise ValueError("Missing threshold values after merging. Ensure all sub-districts match between datasets.")

# Step 6: Categorize emissions based on thresholds
def categorize_emission(row):
    if row['Total_Carbon_Emission (kg CO₂)'] <= row['Low_Threshold']:
        return 0  # Low
    elif row['Total_Carbon_Emission (kg CO₂)'] >= row['High_Threshold']:
        return 2  # High
    else:
        return 1  # Moderate

combined_df['Emission_Category'] = combined_df.apply(categorize_emission, axis=1)

# Check the final DataFrame
print(combined_df[['Household ID', 'Total_Carbon_Emission (kg CO₂)', 'Low_Threshold', 'Moderate_Threshold', 'High_Threshold', 'Emission_Category']].head())


   Household ID  Total_Carbon_Emission (kg CO₂)  Low_Threshold  \
0             1                      218.270901     808.390279   
1             2                      348.196028     755.559324   
2             3                      396.761737     464.211075   
3             4                      230.700290     541.384772   
4             5                      365.895297     269.657924   

   Moderate_Threshold  High_Threshold  Emission_Category  
0         1455.102502     1616.780558                  0  
1         1360.006782     1511.118647                  0  
2          835.579935      928.422150                  0  
3          974.492589     1082.769543                  0  
4          485.384263      539.315848                  1  


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Drop unnecessary columns and prepare features (X) and target (y)
X = combined_df.drop(['Household ID', 'Emission_Category', 'Sub-District', 'District Name', 'Low_Threshold', 'Moderate_Threshold', 'High_Threshold'], axis=1)
y = combined_df['Emission_Category']

# One-hot encode categorical variables
X = pd.get_dummies(X, drop_first=True)

# Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout

# Build the MLP model
mlp_model = Sequential()
mlp_model.add(Dense(128, activation='relu', input_dim=X_train.shape[1]))
mlp_model.add(Dropout(0.2))
mlp_model.add(Dense(64, activation='relu'))
mlp_model.add(Dropout(0.2))
mlp_model.add(Dense(32, activation='relu'))
mlp_model.add(Dense(3, activation='softmax'))  # 3 classes: low, moderate, high

# Compile the model
mlp_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
mlp_history = mlp_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7859 - loss: 0.5303 - val_accuracy: 0.9425 - val_loss: 0.1859
Epoch 2/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9354 - loss: 0.1910 - val_accuracy: 0.9619 - val_loss: 0.1077
Epoch 3/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9566 - loss: 0.1279 - val_accuracy: 0.9725 - val_loss: 0.0803
Epoch 4/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9622 - loss: 0.1050 - val_accuracy: 0.9756 - val_loss: 0.0680
Epoch 5/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9678 - loss: 0.0927 - val_accuracy: 0.9769 - val_loss: 0.0636
Epoch 6/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9714 - loss: 0.0849 - val_accuracy: 0.9725 - val_loss: 0.0635
Epoch 7/100
[1m200/200[0m [32m━

In [None]:
mlp_train_predictions = mlp_model.predict(X_train)
mlp_test_predictions = mlp_model.predict(X_test)

# Convert probabilities to class labels for use in XGBoost
mlp_train_predictions = np.argmax(mlp_train_predictions, axis=1)
mlp_test_predictions = np.argmax(mlp_test_predictions, axis=1)

[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


In [None]:
X_train_combined = np.hstack((X_train, mlp_train_predictions.reshape(-1, 1)))  # Add MLP predictions as features
X_test_combined = np.hstack((X_test, mlp_test_predictions.reshape(-1, 1)))  # Add MLP predictions as features

In [None]:
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report

# Get MLP predictions
mlp_train_predictions = mlp_model.predict(X_train)
mlp_test_predictions = mlp_model.predict(X_test)

# Convert probabilities to class labels for use in XGBoost
mlp_train_features = np.hstack((X_train, mlp_train_predictions))
mlp_test_features = np.hstack((X_test, mlp_test_predictions))

# Train the XGBoost model
xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
xgb_model.fit(mlp_train_features, y_train)
# Make predictions with XGBoost
xgb_predictions = xgb_model.predict(mlp_test_features)

# Evaluate the combined model
combined_accuracy = accuracy_score(y_test, xgb_predictions)
print(f"Combined Model Accuracy: {combined_accuracy:.4f}")
print(classification_report(y_test, xgb_predictions))

[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


Parameters: { "use_label_encoder" } are not used.



Combined Model Accuracy: 0.9960
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       368
           1       0.97      0.96      0.96       112
           2       1.00      1.00      1.00      1520

    accuracy                           1.00      2000
   macro avg       0.99      0.98      0.99      2000
weighted avg       1.00      1.00      1.00      2000



In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
import numpy as np
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Build the MLP model
mlp_model = Sequential()
mlp_model.add(Dense(128, activation='relu', input_dim=X_train.shape[1]))
mlp_model.add(Dropout(0.2))
mlp_model.add(Dense(64, activation='relu'))
mlp_model.add(Dropout(0.2))
mlp_model.add(Dense(32, activation='relu'))
mlp_model.add(Dense(3, activation='softmax'))  # 3 classes: low, moderate, high

# Compile the MLP model
mlp_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the MLP model
mlp_history = mlp_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)

# Get MLP predictions
mlp_train_predictions = mlp_model.predict(X_train)
mlp_test_predictions = mlp_model.predict(X_test)

# Convert probabilities to class labels
mlp_train_predictions = np.argmax(mlp_train_predictions, axis=1)
mlp_test_predictions = np.argmax(mlp_test_predictions, axis=1)

# Combine MLP predictions with the original features (for both train and test sets)
X_train_combined = np.hstack((X_train, mlp_train_predictions.reshape(-1, 1)))  # Add MLP predictions as features
X_test_combined = np.hstack((X_test, mlp_test_predictions.reshape(-1, 1)))  # Add MLP predictions as features


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.7547 - loss: 0.5518 - val_accuracy: 0.9531 - val_loss: 0.1630
Epoch 2/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9393 - loss: 0.1793 - val_accuracy: 0.9681 - val_loss: 0.0854
Epoch 3/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9579 - loss: 0.1195 - val_accuracy: 0.9669 - val_loss: 0.0808
Epoch 4/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9674 - loss: 0.0979 - val_accuracy: 0.9737 - val_loss: 0.0693
Epoch 5/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9683 - loss: 0.0963 - val_accuracy: 0.9756 - val_loss: 0.0755
Epoch 6/100
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9679 - loss: 0.0914 - val_accuracy: 0.9725 - val_loss: 0.0702
Epoch 7/100
[1m200/200[0m [32m━

In [None]:
# Train the XGBoost model on the combined features
xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
xgb_model.fit(X_train_combined, y_train)

# Make predictions with the XGBoost model on the combined test set
xgb_predictions = xgb_model.predict(X_test_combined)

Parameters: { "use_label_encoder" } are not used.



In [None]:
# Evaluate the combined model accuracy
combined_accuracy = accuracy_score(y_test, xgb_predictions)
print(f"Combined Model Accuracy: {combined_accuracy:.4f}")
print(classification_report(y_test, xgb_predictions))

Combined Model Accuracy: 0.9950
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       368
           1       0.96      0.96      0.96       112
           2       1.00      1.00      1.00      1520

    accuracy                           0.99      2000
   macro avg       0.98      0.98      0.98      2000
weighted avg       0.99      0.99      0.99      2000



In [None]:
# Save the MLP model
joblib.dump(mlp_model, '/content/mlp_model.pkl')

# Save the XGBoost model
joblib.dump(xgb_model, '/content/combined_xgb_model.pkl')

print("Models saved successfully!")

# Provide download links for the models
from google.colab import files
files.download('/content/mlp_model.pkl')  # Download MLP model
files.download('/content/combined_xgb_model.pkl')  # Download XGBoost model

Models saved successfully!


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>