In [None]:
# Fuzzy Logic Implementation for RECS 2020 Dataset
# ==============================================
# This script applies fuzzy logic using scikit-fuzzy to compute efficiency scores based on
# energy consumption, income, equipment age, and climate characteristics. It defines rules,
# outputs membership percentages, refines Decision Tree predictions, and visualizes memberships.
#
# Inputs:
# - Processed dataset (data/processed/merged_cleaned.csv)
# Outputs:
# - Dataset with fuzzy scores (data/processed/merged_with_efficiency.csv)
#
# Dependencies: pandas, numpy, scikit-fuzzy, matplotlib

In [None]:
import pandas as pd
import numpy as np
import os
import skfuzzy as fuzz
import matplotlib.pyplot as plt

# Setup paths
BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), ".."))
DATA_DIR = os.path.join(BASE_DIR, "data")
PROCESSED_DIR = os.path.join(DATA_DIR, "processed")

# Load the processed dataset
df = pd.read_csv(os.path.join(PROCESSED_DIR, "merged_cleaned.csv"))

In [None]:
# Compute dynamic ranges for fuzzy logic
energy_params = (
    df['ENERGY_CONSUMPTION_PER_SQFT'].min(),
    df['ENERGY_CONSUMPTION_PER_SQFT'].mean(),
    df['ENERGY_CONSUMPTION_PER_SQFT'].max()
)
income_params = (
    df['Pct_INCOME_MORE_THAN_150K'].min(),
    df['Pct_INCOME_MORE_THAN_150K'].mean(),
    df['Pct_INCOME_MORE_THAN_150K'].max()
)
equipment_params = (
    df['Pct_MAIN_HEAT_AGE_OLDER_THAN_20'].min(),
    df['Pct_MAIN_HEAT_AGE_OLDER_THAN_20'].mean(),
    df['Pct_MAIN_HEAT_AGE_OLDER_THAN_20'].max()
)

In [None]:
# Validate required columns
required_cols = ['ENERGY_CONSUMPTION_PER_SQFT', 'Pct_INCOME_MORE_THAN_150K', 'Pct_MAIN_HEAT_AGE_OLDER_THAN_20', 'STATE']
required_climate_cols = ['CLIMATE_Cold', 'CLIMATE_Hot-Humid', 'CLIMATE_Mixed-Humid', 'CLIMATE_Very-Cold']
missing_cols = [col for col in required_cols + required_climate_cols if col not in df.columns]
if missing_cols:
    raise ValueError(f"Missing columns: {missing_cols}")

In [None]:
# Define fuzzy membership functions using scikit-fuzzy
def fuzz_energy(val, min_val, mean_val, max_val):
    x = np.linspace(min_val, max_val, 100)
    low = fuzz.trimf(x, [min_val, min_val, mean_val])
    medium = fuzz.trimf(x, [min_val, mean_val, max_val])
    high = fuzz.trimf(x, [mean_val, max_val, max_val])
    return {
        'low': fuzz.interp_membership(x, low, val),
        'medium': fuzz.interp_membership(x, medium, val),
        'high': fuzz.interp_membership(x, high, val)
    }

In [None]:
def fuzz_income(val, min_val, mean_val, max_val):
    x = np.linspace(min_val, max_val, 100)
    low = fuzz.trimf(x, [min_val, min_val, mean_val])
    medium = fuzz.trimf(x, [min_val, mean_val, max_val])
    high = fuzz.trimf(x, [mean_val, max_val, max_val])
    return {
        'low': fuzz.interp_membership(x, low, val),
        'medium': fuzz.interp_membership(x, medium, val),
        'high': fuzz.interp_membership(x, high, val)
    }

In [None]:
# Visualize membership functions
x_energy = np.linspace(energy_params[0], energy_params[2], 100)
energy_low = fuzz.trimf(x_energy, [energy_params[0], energy_params[0], energy_params[1]])
energy_medium = fuzz.trimf(x_energy, [energy_params[0], energy_params[1], energy_params[2]])
energy_high = fuzz.trimf(x_energy, [energy_params[1], energy_params[2], energy_params[2]])
plt.figure(figsize=(8, 4))
plt.plot(x_energy, energy_low, label='Low')
plt.plot(x_energy, energy_medium, label='Medium')
plt.plot(x_energy, energy_high, label='High')
plt.xlabel('Energy Consumption (kWh/sqft)')
plt.ylabel('Membership')
plt.title('Energy Consumption Membership Functions')
plt.legend()
plt.show()

In [None]:
# Fuzzy efficiency score function
def fuzzy_efficiency_score(row):
    energy = row['ENERGY_CONSUMPTION_PER_SQFT']
    income = row['Pct_INCOME_MORE_THAN_150K']
    
    # Fuzzy energy level (using scikit-fuzzy)
    energy_fuzzy = fuzz_energy(energy, *energy_params)
    energy_score = energy_fuzzy['low']
    
    # Fuzzy income level
    income_fuzzy = fuzz_income(income, *income_params)
    income_score = income_fuzzy['high']
    
    # Fuzzy climate level
    climate_score = 0.0
    if row.get('CLIMATE_Cold', 0) == 1:
        climate_score = 1.0
    elif row.get('CLIMATE_Mixed-Humid', 0) == 1:
        climate_score = 0.5
    
    # Final fuzzy efficiency score (equal weighting)
    avg_score = (energy_score + income_score + climate_score) / 3
    return round(avg_score * 100, 2)

In [None]:
# Rule-based efficiency
def rule_based_efficiency(row):
    energy = row['ENERGY_CONSUMPTION_PER_SQFT']
    cold = row.get('CLIMATE_Cold', 0)
    hot_humid = row.get('CLIMATE_Hot-Humid', 0)
    very_cold = row.get('CLIMATE_Very-Cold', 0)
    mixed_humid = row.get('CLIMATE_Mixed-Humid', 0)
    
    energy_fuzzy = fuzz_energy(energy, *energy_params)
    
    if energy_fuzzy['low'] > 0.5 and cold:
        return 100  # Rule 1
    elif energy_fuzzy['low'] > 0.5 and hot_humid:
        return 50   # Rule 2
    elif energy_fuzzy['medium'] > 0.5 and very_cold:
        return 50   # Rule 3
    elif energy_fuzzy['medium'] > 0.5 and mixed_humid:
        return 50   # Rule 4
    elif energy_fuzzy['high'] > 0.5 and cold:
        return 50   # Rule 5
    elif energy_fuzzy['high'] > 0.5 and hot_humid:
        return 0    # Rule 6
    else:
        return 33   # Default

In [None]:
# Rule evaluation for FUZZY_OUTPUT
def rule_evaluation(energy, climate_flags, income, equipment_age):
    fuzz_e = fuzz_energy(energy, *energy_params)
    fuzz_i = fuzz_income(income, *income_params)
    
    score = {'low': 0, 'medium': 0, 'high': 0}
    
    # Rule 1: Low energy, Cold climate → High efficiency
    if climate_flags.get('CLIMATE_Cold', 0) == 1:
        score['high'] += fuzz_e['low']
    
    # Rule 2: High energy, Hot-Humid → Low efficiency
    if climate_flags.get('CLIMATE_Hot-Humid', 0) == 1:
        score['low'] += fuzz_e['high']
    
    # Rule 3: Medium income → Moderate efficiency
    score['medium'] += fuzz_i['medium']
    
    # Rule 4: Old equipment → Low efficiency
    if equipment_age > equipment_params[1]:
        score['low'] += 0.5
    
    # Rule 5: Low energy and high income → High efficiency
    if fuzz_e['low'] > 0.5 and fuzz_i['high'] > 0.5:
        score['high'] += 0.5
    
    # Normalize (convert to percentage)
    total = sum(score.values())
    if total > 0:
        for k in score:
            score[k] = round(score[k] / total * 100, 2)
    
    return score

In [None]:
# Apply fuzzy logic
df['EFFICIENCY_SCORE'] = df.apply(fuzzy_efficiency_score, axis=1)
df['RULE_BASED_EFFICIENCY'] = df.apply(rule_based_efficiency, axis=1)
df['FUZZY_OUTPUT'] = df.apply(
    lambda row: rule_evaluation(
        row['ENERGY_CONSUMPTION_PER_SQFT'],
        row.to_dict(),
        row['Pct_INCOME_MORE_THAN_150K'],
        row['Pct_MAIN_HEAT_AGE_OLDER_THAN_20']
    ), axis=1
)

In [None]:
# Add Efficiency_Class for Decision Tree
df['Efficiency_Class'] = df['ENERGY_CONSUMPTION_PER_SQFT'].apply(
    lambda x: "High" if x < df['ENERGY_CONSUMPTION_PER_SQFT'].quantile(0.33) else
             ("Moderate" if x <= df['ENERGY_CONSUMPTION_PER_SQFT'].quantile(0.66) else "Low")
)

In [None]:
# Refine Decision Tree predictions with fuzzy logic
df['FINAL_CLASS'] = df.apply(
    lambda row: max(row['FUZZY_OUTPUT'], key=row['FUZZY_OUTPUT'].get)
    if row['FUZZY_OUTPUT']['high'] > 60 else row['Efficiency_Class'], axis=1
)

# Expand FUZZY_OUTPUT into columns
fuzzy_df = pd.DataFrame(df['FUZZY_OUTPUT'].tolist())
fuzzy_df['STATE'] = df['STATE']
fuzzy_df = fuzzy_df[['STATE', 'low', 'medium', 'high']]

In [None]:
# Save results
output_path = os.path.join(PROCESSED_DIR, "merged_with_efficiency.csv")
df.to_csv(output_path, index=False)

# Verify results
print(f"Dataset with fuzzy scores saved to {output_path}")
print("Columns in saved dataset:", df.columns.tolist())
print("Sample FUZZY_OUTPUT:", df['FUZZY_OUTPUT'].head().tolist())
print("Fuzzy Output Preview:")
print(fuzzy_df.head(10))