In [16]:
## Fuzzy Logic: Membership Functions
!pip install --user scikit-fuzzy




In [17]:
def fuzzy_efficiency_score(row):
    energy = row['ENERGY_CONSUMPTION_PER_SQFT']
    income = row['Pct_INCOME_MORE_THAN_150K']

    # Fuzzy energy level
    if energy < 35:
        energy_score = 1.0
    elif 35 <= energy <= 45:
        energy_score = 0.5
    else:
        energy_score = 0.0

    # Fuzzy income level
    if income > 20:
        income_score = 1.0
    elif 10 < income <= 20:
        income_score = 0.5
    else:
        income_score = 0.0

    # Fuzzy climate level — using one-hot encoded columns
    if row.get('CLIMATE_Cold', False):
        climate_score = 1.0
    elif row.get('CLIMATE_Mixed-Humid', False):
        climate_score = 0.5
    else:
        climate_score = 0.0

    # Final fuzzy efficiency score (equal weighting)
    avg_score = (energy_score + income_score + climate_score) / 3
    return round(avg_score * 100, 2)



In [18]:
df['EFFICIENCY_SCORE'] = df.apply(fuzzy_efficiency_score, axis=1)



In [19]:
import pandas as pd

# Load the merged dataset
df = pd.read_csv("/workspaces/CPSC6185-Final-Project/data/processed/merged_cleaned.csv")

# Apply fuzzy efficiency score function
df['EFFICIENCY_SCORE'] = df.apply(fuzzy_efficiency_score, axis=1)

# Save the result (optional but recommended)
df.to_csv("/workspaces/CPSC6185-Final-Project/data/processed/merged_with_efficiency.csv", index=False)

# Preview
df[['STATE', 'ENERGY_CONSUMPTION_PER_SQFT', 'Pct_INCOME_MORE_THAN_150K', 'EFFICIENCY_SCORE']].head()


Unnamed: 0,STATE,ENERGY_CONSUMPTION_PER_SQFT,Pct_INCOME_MORE_THAN_150K,EFFICIENCY_SCORE
0,Colorado,43.74,15.86,66.67
1,Utah,43.08,11.72,66.67
2,Idaho,45.09,7.67,33.33
3,Montana,46.71,6.11,33.33
4,North Dakota,46.75,12.24,16.67


In [21]:
def rule_based_efficiency(row):
    energy = row['ENERGY_CONSUMPTION_PER_SQFT']

    # Identify climate one-hot flags
    cold = row.get('CLIMATE_Cold', 0)
    hot_humid = row.get('CLIMATE_Hot-Humid', 0)
    very_cold = row.get('CLIMATE_Very-Cold', 0)
    mixed_humid = row.get('CLIMATE_Mixed-Humid', 0)

    # Apply fuzzy rules
    if energy < 35 and cold:
        return 100  # Rule 1
    elif energy < 35 and hot_humid:
        return 50   # Rule 2
    elif 35 <= energy <= 45 and very_cold:
        return 50   # Rule 3
    elif 35 <= energy <= 45 and mixed_humid:
        return 50   # Rule 4
    elif energy > 45 and cold:
        return 50   # Rule 5
    elif energy > 45 and hot_humid:
        return 0    # Rule 6
    else:
        return 33   # Default / unknown conditions

# Apply to dataset
df['RULE_BASED_EFFICIENCY'] = df.apply(rule_based_efficiency, axis=1)



In [22]:
df[['STATE', 'ENERGY_CONSUMPTION_PER_SQFT', 'CLIMATE_Cold', 'CLIMATE_Hot-Humid', 'RULE_BASED_EFFICIENCY']].head()


Unnamed: 0,STATE,ENERGY_CONSUMPTION_PER_SQFT,CLIMATE_Cold,CLIMATE_Hot-Humid,RULE_BASED_EFFICIENCY
0,Colorado,43.74,True,False,33
1,Utah,43.08,True,False,33
2,Idaho,45.09,True,False,50
3,Montana,46.71,True,False,50
4,North Dakota,46.75,False,False,33
