In [3]:
import pandas as pd

df = pd.read_csv("env_impacts.csv")

In [4]:
df


Unnamed: 0,Entity,Year,ghg_kg,ghg_1000kcal,ghg_100gprotein,ghg_100gfat,land_use_kg,land_use_1000kcal,land_use_100gprotein,land_use_100gfat,...,water_scarcity_100gprotein,water_scarcity_100gfat,water_kg,water_1000kcal,water_100gprotein,water_100gfat,biodiversity_kg,biodiversity_1000kcal,biodiversity_100gprotein,biodiversity_100gfat
0,Ale,2022,0.488690,0.317338,0.878525,2.424209,0.811485,0.601152,1.577687,3.065766,...,3.746103e+03,7464.549375,49.402842,37.873277,98.984968,181.803039,19.944790,15.570546,39.861995,62.666331
1,Almond butter,2022,0.387011,0.067265,0.207599,0.079103,7.683045,1.296870,3.608433,1.495297,...,1.827076e+05,77714.970299,6846.472597,1151.953867,3125.971106,1324.701109,252.788221,42.626235,117.246387,49.101614
2,Almond milk,2022,0.655888,2.222230,13.595512,4.057470,1.370106,2.675063,12.687839,4.600530,...,1.471679e+06,431118.047177,1246.973783,2766.258122,13432.324330,4286.672872,50.401108,99.766367,477.142139,190.732797
3,Almonds,2022,0.602368,0.105029,0.328335,0.119361,8.230927,1.423376,4.261040,1.610136,...,1.889947e+05,72908.196078,6413.314597,1106.064215,3258.085733,1254.124113,261.718501,45.213038,134.399913,51.438593
4,Apple juice,2022,0.458378,0.955184,29.152212,19.754979,0.660629,1.382839,43.232158,26.246743,...,1.777600e+05,199118.258133,122.318340,245.723809,6160.521065,8566.814616,11.021176,22.752032,668.427016,551.574374
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206,Vegetarian pizza,2022,5.232976,1.891635,3.773106,3.919468,6.520732,2.381733,4.769522,4.998371,...,1.138941e+04,12249.088675,579.253113,215.282138,444.065726,466.740381,307.775623,108.180940,210.150691,214.389929
207,Walnuts,2022,2.416308,0.409580,1.725508,0.492456,11.875852,1.924057,7.828816,2.092320,...,1.033235e+05,28606.843189,3458.072291,563.902845,2236.256141,612.117210,260.975114,42.379569,171.372416,46.089169
208,Watermelon,2022,0.969403,2.464087,16.335799,22.110017,1.009878,2.616771,17.232334,22.874311,...,2.333280e+05,304187.031234,743.313468,1971.260276,12903.761529,16688.285306,33.587313,87.819304,576.643845,757.983229
209,Wine,2022,1.722881,14.623111,249.889035,332.390402,4.333116,15.575567,252.024497,271.197030,...,1.777504e+05,228692.372279,93.535379,688.774728,12616.807047,7647.301258,87.033818,372.045896,6062.107987,5642.790160


In [5]:
import pandas as pd
import numpy as np

# Ensure relevant columns are numeric
impact_metrics = [
    'ghg_kg', 'water_kg', 'land_use_kg',
    'eutrophication_kg', 'biodiversity_kg'
]
for metric in impact_metrics:
    df[metric] = pd.to_numeric(df[metric], errors='coerce')

# Filter out rows where all metrics are missing
df = df.dropna(subset=impact_metrics, how='all')

# Compute log-normalization ranges
log_norm_ranges = {}
for metric in impact_metrics:
    valid_vals = df[metric][df[metric] > 0]
    if not valid_vals.empty:
        log_norm_ranges[metric] = (np.log(valid_vals.min()), np.log(valid_vals.max()))
    else:
        log_norm_ranges[metric] = (0, 1)  # Avoid division by zero

# Normalize and grade each metric
def normalize_and_grade(row):
    result = {}
    for metric in impact_metrics:
        score_col = f"{metric}_score"
        grade_col = f"{metric}_grade"
        value = row[metric]
        if pd.notna(value) and value > 0:
            log_min, log_max = log_norm_ranges[metric]
            normalized = (log_max - np.log(value)) / (log_max - log_min)
            score = round(normalized * 100, 2)
        else:
            score = 0.0  # Worst case

        # Assign grade based on score
        if score >= 80:
            grade = 'A'
        elif score >= 60:
            grade = 'B'
        elif score >= 40:
            grade = 'C'
        elif score >= 20:
            grade = 'D'
        else:
            grade = 'E'

        result[score_col] = score
        result[grade_col] = grade
    return pd.Series(result)

# Apply normalization and grading
graded_df = df.join(df.apply(normalize_and_grade, axis=1))

# Build lookup dictionary
multi_metric_lookup = {
    row['Entity'].strip().lower(): {
        metric: {
            'score': round(row[f"{metric}_score"], 2),
            'grade': row[f"{metric}_grade"]
        }
        for metric in impact_metrics
    }
    for _, row in graded_df.iterrows()
}

dict(list(multi_metric_lookup.items())[:5])

{'ale': {'ghg_kg': {'score': 86.68, 'grade': 'A'},
  'water_kg': {'score': 43.6, 'grade': 'C'},
  'land_use_kg': {'score': 89.71, 'grade': 'A'},
  'eutrophication_kg': {'score': 95.05, 'grade': 'A'},
  'biodiversity_kg': {'score': 69.98, 'grade': 'B'}},
 'almond butter': {'ghg_kg': {'score': 90.3, 'grade': 'A'},
  'water_kg': {'score': 0.0, 'grade': 'E'},
  'land_use_kg': {'score': 57.53, 'grade': 'C'},
  'eutrophication_kg': {'score': 55.25, 'grade': 'C'},
  'biodiversity_kg': {'score': 38.25, 'grade': 'D'}},
 'almond milk': {'ghg_kg': {'score': 82.11, 'grade': 'A'},
  'water_kg': {'score': 15.06, 'grade': 'E'},
  'land_use_kg': {'score': 82.21, 'grade': 'A'},
  'eutrophication_kg': {'score': 79.84, 'grade': 'B'},
  'biodiversity_kg': {'score': 58.4, 'grade': 'C'}},
 'almonds': {'ghg_kg': {'score': 83.43, 'grade': 'A'},
  'water_kg': {'score': 0.58, 'grade': 'E'},
  'land_use_kg': {'score': 56.54, 'grade': 'C'},
  'eutrophication_kg': {'score': 55.36, 'grade': 'C'},
  'biodiversity_kg

In [6]:
# Recalculate log-normalized scores and store in new columns before computing weighted score

# Define the correct columns again
impact_metrics = [
    'ghg_kg', 'water_kg', 'land_use_kg',
    'eutrophication_kg', 'biodiversity_kg'
]

# Compute log ranges
log_norm_ranges = {
    metric: (np.log(df[metric][df[metric] > 0].min()), np.log(df[metric][df[metric] > 0].max()))
    for metric in impact_metrics
}

# Recompute normalized scores
def compute_log_scores(row):
    scores = {}
    for metric in impact_metrics:
        value = row[metric]
        score_col = f"{metric}_score"
        if pd.notna(value) and value > 0:
            log_min, log_max = log_norm_ranges[metric]
            normalized = (log_max - np.log(value)) / (log_max - log_min)
            scores[score_col] = round(normalized * 100, 2)
        else:
            scores[score_col] = 0.0
    return pd.Series(scores)

# Apply to DataFrame
score_df = df.apply(compute_log_scores, axis=1)
df = pd.concat([df, score_df], axis=1)

# Define weights
weights = {
    'ghg_kg': 0.4,
    'water_kg': 0.2,
    'land_use_kg': 0.2,
    'eutrophication_kg': 0.1,
    'biodiversity_kg': 0.1
}

# Compute weighted score
def compute_weighted_score(row):
    score = 0.0
    for metric in impact_metrics:
        score_col = f"{metric}_score"
        score += row[score_col] * weights[metric]
    return round(score, 2)

df['weighted_score'] = df.apply(compute_weighted_score, axis=1)

# Assign letter grade
def score_to_grade(score):
    if score >= 80:
        return 'A'
    elif score >= 60:
        return 'B'
    elif score >= 40:
        return 'C'
    elif score >= 20:
        return 'D'
    else:
        return 'E'

df['weighted_grade'] = df['weighted_score'].apply(score_to_grade)

# Build final lookup
final_weighted_lookup = {
    row['Entity'].strip().lower(): {
        'score': row['weighted_score'],
        'grade': row['weighted_grade']
    }
    for _, row in df.iterrows()
}


dict(list(final_weighted_lookup.items())[:10])


{'ale': {'score': 77.84, 'grade': 'B'},
 'almond butter': {'score': 56.98, 'grade': 'C'},
 'almond milk': {'score': 66.12, 'grade': 'B'},
 'almonds': {'score': 54.11, 'grade': 'C'},
 'apple juice': {'score': 77.71, 'grade': 'B'},
 'apple pie': {'score': 62.73, 'grade': 'B'},
 'apples': {'score': 78.34, 'grade': 'B'},
 'apricot jam': {'score': 60.62, 'grade': 'B'},
 'asparagus': {'score': 67.44, 'grade': 'B'},
 'avocados': {'score': 68.06, 'grade': 'B'}}

In [7]:
final_detailed_lookup = {
    row['Entity'].strip().lower(): {
        metric: {
            'score': row[f"{metric}_score"]
        }
        for metric in impact_metrics
    } | {
        'weighted_score': row['weighted_score'],
        'weighted_grade': row['weighted_grade']
    }
    for _, row in df.iterrows()
}

# Show a sample of the final detailed lookup
dict(list(final_detailed_lookup.items()))

{'ale': {'ghg_kg': {'score': 86.68},
  'water_kg': {'score': 43.6},
  'land_use_kg': {'score': 89.71},
  'eutrophication_kg': {'score': 95.05},
  'biodiversity_kg': {'score': 69.98},
  'weighted_score': 77.84,
  'weighted_grade': 'B'},
 'almond butter': {'ghg_kg': {'score': 90.3},
  'water_kg': {'score': 0.0},
  'land_use_kg': {'score': 57.53},
  'eutrophication_kg': {'score': 55.25},
  'biodiversity_kg': {'score': 38.25},
  'weighted_score': 56.98,
  'weighted_grade': 'C'},
 'almond milk': {'ghg_kg': {'score': 82.11},
  'water_kg': {'score': 15.06},
  'land_use_kg': {'score': 82.21},
  'eutrophication_kg': {'score': 79.84},
  'biodiversity_kg': {'score': 58.4},
  'weighted_score': 66.12,
  'weighted_grade': 'B'},
 'almonds': {'ghg_kg': {'score': 83.43},
  'water_kg': {'score': 0.58},
  'land_use_kg': {'score': 56.54},
  'eutrophication_kg': {'score': 55.36},
  'biodiversity_kg': {'score': 37.82},
  'weighted_score': 54.11,
  'weighted_grade': 'C'},
 'apple juice': {'ghg_kg': {'score':

In [8]:
import json

with open("env_scores.json", "w") as f:
    json.dump(final_detailed_lookup, f, indent=2)

In [9]:
# Load the uploaded environmental scores file
with open("env_scores.json", "r", encoding="utf-8") as f:
    env_scores = json.load(f)

# Modify each metric to include a grade based on score
def score_to_grade(score):
    if score >= 80:
        return 'A'
    elif score >= 60:
        return 'B'
    elif score >= 40:
        return 'C'
    elif score >= 20:
        return 'D'
    else:
        return 'E'

# Update each entity's metrics to include grade
for entity, metrics in env_scores.items():
    
    for metric, data in metrics.items():
        if isinstance(data, dict) and "score" in data:
            data["grade"] = score_to_grade(data["score"])

# Save updated data to a new JSON file
output_path = "env_scores_with_grades.json"
with open(output_path, "w", encoding="utf-8") as f:
    json.dump(env_scores, f, indent=2, ensure_ascii=False)

output_path

'env_scores_with_grades.json'