In [None]:
# Fuzzy Decision Tree Classifier for RECS 2020 Dataset
# ==================================================
# This script implements a custom Fuzzy Decision Tree classifier using scikit-fuzzy,
# predicts energy efficiency classes, and integrates fuzzy logic scores if available.
#
# Inputs:
# - Processed dataset (data/processed/merged_with_efficiency.csv or merged_cleaned.csv)
# Outputs:
# - Trained model, evaluation metrics, visualizations
#
# Dependencies: pandas, numpy, scikit-fuzzy, scikit-learn, seaborn, matplotlib

In [None]:
import pandas as pd
import numpy as np
import os
import joblib
import skfuzzy as fuzz
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Setup paths
BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), ".."))
DATA_DIR = os.path.join(BASE_DIR, "data")
PROCESSED_DIR = os.path.join(DATA_DIR, "processed")
MODELS_DIR = os.path.join(BASE_DIR, 'models')
os.makedirs(MODELS_DIR, exist_ok=True)

# Load data
DATA_PATH = os.path.join(PROCESSED_DIR, 'merged_with_efficiency.csv')
data = pd.read_csv(DATA_PATH)

In [None]:
# Define parameters
energy_params = (
    data['ENERGY_CONSUMPTION_PER_SQFT'].min(),
    data['ENERGY_CONSUMPTION_PER_SQFT'].mean(),
    data['ENERGY_CONSUMPTION_PER_SQFT'].max()
)
income_params = (
    data['Pct_INCOME_MORE_THAN_150K'].min(),
    data['Pct_INCOME_MORE_THAN_150K'].mean(),
    data['Pct_INCOME_MORE_THAN_150K'].max()
)

# Fuzzy membership functions
def fuzz_energy(val, min_val, mean_val, max_val):
    x = np.linspace(min_val, max_val, 100)
    low = fuzz.trimf(x, [min_val, min_val, mean_val])
    medium = fuzz.trimf(x, [min_val, mean_val, max_val])
    high = fuzz.trimf(x, [mean_val, max_val, max_val])
    return {
        'low': fuzz.interp_membership(x, low, val),
        'medium': fuzz.interp_membership(x, medium, val),
        'high': fuzz.interp_membership(x, high, val)
    }

def fuzz_income(val, min_val, mean_val, max_val):
    x = np.linspace(min_val, max_val, 100)
    low = fuzz.trimf(x, [min_val, min_val, mean_val])
    medium = fuzz.trimf(x, [min_val, mean_val, max_val])
    high = fuzz.trimf(x, [mean_val, max_val, max_val])
    return {
        'low': fuzz.interp_membership(x, low, val),
        'medium': fuzz.interp_membership(x, medium, val),
        'high': fuzz.interp_membership(x, high, val)
    }

In [None]:
# FuzzyDecisionTree class
class FuzzyDecisionTree:
    def __init__(self, max_depth=6, random_state=42):
        self.model = DecisionTreeClassifier(max_depth=max_depth, random_state=random_state)
        self.feature_names = []
        self.energy_params = None
        self.income_params = None

    def fuzzify_features(self, data):
        energy_fuzz = data['ENERGY_CONSUMPTION_PER_SQFT'].apply(
            lambda x: fuzz_energy(x, *self.energy_params)
        )
        income_fuzz = data['Pct_INCOME_MORE_THAN_150K'].apply(
            lambda x: fuzz_income(x, *self.income_params)
        )
        
        fuzzified_data = pd.DataFrame({
            'energy_low': energy_fuzz.apply(lambda x: x['low']),
            'energy_medium': energy_fuzz.apply(lambda x: x['medium']),
            'energy_high': energy_fuzz.apply(lambda x: x['high']),
            'income_low': income_fuzz.apply(lambda x: x['low']),
            'income_medium': income_fuzz.apply(lambda x: x['medium']),
            'income_high': income_fuzz.apply(lambda x: x['high'])
        })
        
        # Include raw/binary features
        other_features = [
            'CLIMATE_Cold', 'Pct_MAIN_HEAT_AGE_OLDER_THAN_20',
            'CLIMATE_Hot-Humid', 'CLIMATE_Mixed-Humid', 'CLIMATE_Very-Cold',
            'Pct_HOUSING_SINGLE_FAMILY_HOME_DETACHED',
            'Pct_HOUSING_APT_MORE_THAN_5_UNITS', 'Pct_BUILT_BEFORE_1950',
            'Pct_MAIN_AC_AGE_OLDER_THAN_20', 'Pct_MAIN_WATER_HEAT_OLDER_THAN_20'
        ]
        other_features = [f for f in other_features if f in data.columns]
        fuzzified_data = pd.concat([fuzzified_data, data[other_features]], axis=1)
        
        return fuzzified_data

    def fit(self, X, y, energy_params, income_params):
        self.energy_params = energy_params
        self.income_params = income_params
        X_fuzz = self.fuzzify_features(X)
        self.feature_names = X_fuzz.columns.tolist()
        self.model.fit(X_fuzz, y)
        return self

    def predict(self, X):
        X_fuzz = self.fuzzify_features(X)
        return self.model.predict(X_fuzz)

    @property
    def feature_importances_(self):
        return self.model.feature_importances_

    @property
    def feature_names_in_(self):
        return np.array(self.feature_names)

In [None]:
# Features
features = [
    'ENERGY_CONSUMPTION_PER_SQFT', 'Pct_INCOME_MORE_THAN_150K', 'CLIMATE_Cold',
    'Pct_MAIN_HEAT_AGE_OLDER_THAN_20', 'CLIMATE_Hot-Humid', 'CLIMATE_Mixed-Humid',
    'CLIMATE_Very-Cold', 'Pct_HOUSING_SINGLE_FAMILY_HOME_DETACHED',
    'Pct_HOUSING_APT_MORE_THAN_5_UNITS', 'Pct_BUILT_BEFORE_1950',
    'Pct_MAIN_AC_AGE_OLDER_THAN_20', 'Pct_MAIN_WATER_HEAT_OLDER_THAN_20'
]
features = [f for f in features if f in data.columns]

# Prepare data
X = data[features]
y = data['Efficiency_Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train FuzzyDecisionTree
fdt = FuzzyDecisionTree(max_depth=6, random_state=42)
fdt.fit(X_train, y_train, energy_params, income_params)

# Evaluate
y_pred = fdt.predict(X_test)
print('Classification Report:')
print(classification_report(y_test, y_pred))

In [None]:
# Feature importance
feature_importance = pd.DataFrame({
    'Feature': fdt.feature_names_in_,
    'Importance': fdt.feature_importances_
})
print('Feature Importance:')
print(feature_importance[feature_importance['Importance'] > 0])

# Save model
output_path = os.path.join(MODELS_DIR, 'fuzzy_decision_tree_model.pkl')
joblib.dump(fdt, output_path)
print(f'Saved model: {output_path}')