In [1]:
import pandas as pd 
import numpy as np

In [2]:
df=pd.read_csv("standardised_data.csv")
df2=pd.read_csv("recommended_nutrients.csv")
df3=pd.read_csv("USDA.csv")

In [3]:
default_weights = {
    'Energy (kcal)': 5,
    'Protein (g)': 100,
    'Carbohydrate (g)': 40,
    'Sugars, total\n(g)': 0.01,
    'Fiber, total dietary (g)': 70,
    'Total Fat (g)': 1,
    'Fatty acids, total saturated (g)': 0.001,
    'Fatty acids, total monounsaturated (g)': 1,
    'Fatty acids, total polyunsaturated (g)': 1,
    'Cholesterol (g)': 0.001,
    'Vitamin A, RAE (g)': 60,
    'Thiamin (g)': 45,
    'Riboflavin (g)': 30,
    'Niacin (g)': 30,
    'Vitamin B-6 (g)': 40,
    'Folate, total (g)': 60,
    'Choline, total (g)': 20,
    'Vitamin B-12 (g)': 60,
    'Vitamin C (g)': 60,
    'Vitamin D (D2 + D3) (g)': 70,
    'Vitamin E (alpha-tocopherol) (g)': 50,
    'Vitamin K (phylloquinone) (g)': 50,
    'Calcium (g)': 50,
    'Phosphorus (g)': 45,
    'Magnesium (g)': 20,
    'Iron\n(g)': 50,
    'Zinc\n(g)': 50,
    'Copper (g)': 50,
    'Selenium (g)': 20,
    'Potassium (g)': 40,
    'Sodium (g)': 1,
    'Caffeine (g)': 0.1,
    'Theobromine (g)': 5,
}


In [4]:
# Extract RDI values starting from 'Energy (kcal)' for males and females
male_energy = df2.iloc[0]['Energy (kcal)']
female_energy = df2.iloc[1]['Energy (kcal)']

# Calculate weights relative to energy intake and scale by 100
male_weights = df2.iloc[0, 1:].div(male_energy).mul(100).to_dict()
female_weights = df2.iloc[1, 1:].div(female_energy).mul(100).to_dict()

# Set specific weights for Energy (kcal)
male_weights['Energy (kcal)'] = 2.4
female_weights['Energy (kcal)'] = 1.8

# Round weights to 4 decimal places
male_weights = {key: round(value, 4) for key, value in male_weights.items()}
female_weights = {key: round(value, 4) for key, value in female_weights.items()}

# Now male_weights and female_weights dictionaries will contain adjusted RDI values rounded to 4 decimal places
print("Male Weights:")
print(male_weights)
print("\nFemale Weights:")
print(female_weights)


Male Weights:
{'Energy (kcal)': 2.4, 'Protein (g)': 2.2917, 'Carbohydrate (g)': 5.4167, 'Sugars, total\n(g)': 2.0833, 'Fiber, total dietary (g)': 1.2833, 'Total Fat (g)': 3.0558, 'Fatty acids, total saturated (g)': 1.1112, 'Fatty acids, total monounsaturated (g)': 0.9708, 'Fatty acids, total polyunsaturated (g)': 0.9708, 'Cholesterol (g)': 0.0208, 'Vitamin A, RAE (g)': 0.0, 'Thiamin (g)': 0.0, 'Riboflavin (g)': 0.0001, 'Niacin (g)': 0.0007, 'Vitamin B-6 (g)': 0.0001, 'Folate, total (g)': 0.0, 'Choline, total (g)': 0.0229, 'Vitamin B-12 (g)': 0.0, 'Vitamin C (g)': 0.0037, 'Vitamin D (D2 + D3) (g)': 25.0, 'Vitamin E (alpha-tocopherol) (g)': 0.0006, 'Vitamin K (phylloquinone) (g)': 0.0, 'Calcium (g)': 0.0417, 'Phosphorus (g)': 0.0292, 'Magnesium (g)': 0.0171, 'Iron\n(g)': 0.0003, 'Zinc\n(g)': 0.0005, 'Copper (g)': 0.0, 'Selenium (g)': 0.0, 'Potassium (g)': 0.1958, 'Sodium (g)': 0.0958, 'Caffeine (g)': 0.0167, 'Theobromine (g)': 0.0167}

Female Weights:
{'Energy (kcal)': 1.8, 'Protein (g)'

In [5]:
def calculate_health_scores(df, male_weights, default_weights):
    # Calculate combined weights using a modified approach
    combined_weights = {}
    for key in default_weights:
        if key in male_weights:
#             combined_weights[key] = np.sqrt(default_weights[key]) * male_weights[key]
            combined_weights[key] = (default_weights[key] * male_weights[key])
        else:
            combined_weights[key] = (default_weights[key])
#             combined_weights[key] = np.sqrt(default_weights[key])
              
    print(combined_weights)
    # Calculate health score for each food item based on combined weights
    df['Health Score'] = (df[list(combined_weights.keys())] * list(combined_weights.values())).sum(axis=1)

    # Standardize health score using z-score normalization
    mean_score = df['Health Score'].mean()
    std_score = df['Health Score'].std()
    df['Health Score'] = ((df['Health Score'] - mean_score) / std_score) * 45 +60
    df['Health Score'] = df['Health Score'].round(3)

    return df[['Food code', 'Main food description', 'Health Score']]

# Example usage for males with combined weights (using default and male-specific weights)
male_health_scores = calculate_health_scores(df, male_weights, default_weights)

# Example printout of health scores with combined weights
print("Male Health Scores with Combined Weights:")
male_health_scores.head(1452)


{'Energy (kcal)': 12.0, 'Protein (g)': 229.17000000000002, 'Carbohydrate (g)': 216.66799999999998, 'Sugars, total\n(g)': 0.020833, 'Fiber, total dietary (g)': 89.831, 'Total Fat (g)': 3.0558, 'Fatty acids, total saturated (g)': 0.0011112, 'Fatty acids, total monounsaturated (g)': 0.9708, 'Fatty acids, total polyunsaturated (g)': 0.9708, 'Cholesterol (g)': 2.08e-05, 'Vitamin A, RAE (g)': 0.0, 'Thiamin (g)': 0.0, 'Riboflavin (g)': 0.003, 'Niacin (g)': 0.021, 'Vitamin B-6 (g)': 0.004, 'Folate, total (g)': 0.0, 'Choline, total (g)': 0.458, 'Vitamin B-12 (g)': 0.0, 'Vitamin C (g)': 0.222, 'Vitamin D (D2 + D3) (g)': 1750.0, 'Vitamin E (alpha-tocopherol) (g)': 0.03, 'Vitamin K (phylloquinone) (g)': 0.0, 'Calcium (g)': 2.085, 'Phosphorus (g)': 1.314, 'Magnesium (g)': 0.342, 'Iron\n(g)': 0.015, 'Zinc\n(g)': 0.025, 'Copper (g)': 0.0, 'Selenium (g)': 0.0, 'Potassium (g)': 7.832, 'Sodium (g)': 0.0958, 'Caffeine (g)': 0.00167, 'Theobromine (g)': 0.08349999999999999}
Male Health Scores with Combined

Unnamed: 0,Food code,Main food description,Health Score
0,11000000,"Milk, human",39.177
1,11100000,"Milk, NFS",71.050
2,11111000,"Milk, whole",70.975
3,11112110,"Milk, reduced fat (2%)",71.087
4,11112210,"Milk, low fat (1%)",71.150
...,...,...,...
1447,27420270,"Ham and vegetables excluding carrots, broccoli...",47.832
1448,27420350,"Pork and vegetables excluding carrots, broccol...",49.865
1449,27420370,"Pork, tofu, and vegetables, excluding carrots,...",44.175
1450,27420390,"Pork chow mein or chop suey, no noodles",49.211


In [6]:
def calculate_health_scores(df, female_weights, default_weights):
    # Calculate combined weights using a modified approach
    combined_weights = {}
    for key in default_weights:
        if key in female_weights:
#             combined_weights[key] = np.sqrt(default_weights[key]) * male_weights[key]
            combined_weights[key] = (default_weights[key] * female_weights[key])
        else:
            combined_weights[key] = (default_weights[key])
#             combined_weights[key] = np.sqrt(default_weights[key])
              
    print(combined_weights)
    # Calculate health score for each food item based on combined weights
    df['Health Score'] = (df[list(combined_weights.keys())] * list(combined_weights.values())).sum(axis=1)

    # Standardize health score using z-score normalization
    mean_score = df['Health Score'].mean()
    std_score = df['Health Score'].std()
    df['Health Score'] = ((df['Health Score'] - mean_score) / std_score) * 40 + 60
    df['Health Score'] = df['Health Score'].round(3)

    return df[['Food code', 'Main food description', 'Health Score']]

# Example usage for males with combined weights (using default and male-specific weights)
female_health_scores = calculate_health_scores(df,female_weights, default_weights)

# Example printout of health scores with combined weights
print("feMale Health Scores with Combined Weights:")
female_health_scores.head(1452)


{'Energy (kcal)': 9.0, 'Protein (g)': 255.56, 'Carbohydrate (g)': 288.888, 'Sugars, total\n(g)': 0.027778, 'Fiber, total dietary (g)': 119.777, 'Total Fat (g)': 3.0556, 'Fatty acids, total saturated (g)': 0.0011111, 'Fatty acids, total monounsaturated (g)': 0.9722, 'Fatty acids, total polyunsaturated (g)': 0.9722, 'Cholesterol (g)': 1.67e-05, 'Vitamin A, RAE (g)': 0.0, 'Thiamin (g)': 0.0045000000000000005, 'Riboflavin (g)': 0.003, 'Niacin (g)': 0.024, 'Vitamin B-6 (g)': 0.004, 'Folate, total (g)': 0.0, 'Choline, total (g)': 0.472, 'Vitamin B-12 (g)': 0.0, 'Vitamin C (g)': 0.252, 'Vitamin D (D2 + D3) (g)': 2333.331, 'Vitamin E (alpha-tocopherol) (g)': 0.04, 'Vitamin K (phylloquinone) (g)': 0.0, 'Calcium (g)': 0.27999999999999997, 'Phosphorus (g)': 1.7505, 'Magnesium (g)': 0.35000000000000003, 'Iron\n(g)': 0.05, 'Zinc\n(g)': 0.02, 'Copper (g)': 0.0, 'Selenium (g)': 0.0, 'Potassium (g)': 10.443999999999999, 'Sodium (g)': 0.1278, 'Caffeine (g)': 0.00222, 'Theobromine (g)': 0.111}
feMale He

Unnamed: 0,Food code,Main food description,Health Score
0,11000000,"Milk, human",42.228
1,11100000,"Milk, NFS",70.526
2,11111000,"Milk, whole",70.457
3,11112110,"Milk, reduced fat (2%)",70.557
4,11112210,"Milk, low fat (1%)",70.619
...,...,...,...
1447,27420270,"Ham and vegetables excluding carrots, broccoli...",49.242
1448,27420350,"Pork and vegetables excluding carrots, broccol...",50.793
1449,27420370,"Pork, tofu, and vegetables, excluding carrots,...",45.994
1450,27420390,"Pork chow mein or chop suey, no noodles",50.204


In [7]:
import pandas as pd
import numpy as np

def calculate_health_scores(df, male_weights, female_weights, default_weights):
    # Calculate combined weights for males using a modified approach
    combined_weights_male = {}
    for key in default_weights:
        if key in male_weights:
            combined_weights_male[key] = (default_weights[key] * male_weights[key])
        else:
            combined_weights_male[key] = (default_weights[key])
              
    # Calculate health score for males based on combined weights
    df['Health Score Male'] = (df[list(combined_weights_male.keys())] * list(combined_weights_male.values())).sum(axis=1)

    # Standardize health score using min-max normalization for males
    min_score_male = df['Health Score Male'].min()
    max_score_male = df['Health Score Male'].max()
    df['Health Score Male'] = ((df['Health Score Male'] - min_score_male) / (max_score_male - min_score_male)) * 99 + 1
    df['Health Score Male'] = df['Health Score Male'].round(3)

    # Ensure uniqueness of values within the range 1 to 100 for males
    df['Health Score Male'] = df['Health Score Male'].rank(method='first', pct=True) * 99 + 1

    # Calculate combined weights for females using a modified approach
    combined_weights_female = {}
    for key in default_weights:
        if key in female_weights:
            combined_weights_female[key] = (default_weights[key] * female_weights[key])
        else:
            combined_weights_female[key] = (default_weights[key])
              
    # Calculate health score for females based on combined weights
    df['Health Score Female'] = (df[list(combined_weights_female.keys())] * list(combined_weights_female.values())).sum(axis=1)

    # Standardize health score using min-max normalization for females
    min_score_female = df['Health Score Female'].min()
    max_score_female = df['Health Score Female'].max()
    df['Health Score Female'] = ((df['Health Score Female'] - min_score_female) / (max_score_female - min_score_female)) * 99 + 1
    df['Health Score Female'] = df['Health Score Female'].round(3)

    # Ensure uniqueness of values within the range 1 to 100 for females
    df['Health Score Female'] = df['Health Score Female'].rank(method='first', pct=True) * 99 + 1

    return df

# Read the standardised data
df = pd.read_csv("standardised_data.csv")

# Example usage for males and females with combined weights (using default, male-specific, and female-specific weights)
df = calculate_health_scores(df, male_weights, female_weights, default_weights)

# Save the modified DataFrame back to a CSV file if needed
df.to_csv("standardised_data_with_health_scores.csv", index=False)

# Example printout of the DataFrame with added health scores
print("DataFrame with Health Scores:")
print(df[['Food code', 'Main food description', 'Health Score Male', 'Health Score Female']].head(360))


DataFrame with Health Scores:
     Food code   Main food description  Health Score Male  Health Score Female
0     11000000             Milk, human          15.751422            15.962660
1     11100000               Milk, NFS          86.850462            88.012269
2     11111000             Milk, whole          86.727240            87.959459
3     11112110  Milk, reduced fat (2%)          86.903272            88.047475
4     11112210      Milk, low fat (1%)          87.096906            88.153094
..         ...                     ...                ...                  ...
355   14104200           Cheese, Colby          87.272937            85.512624
356   14104250      Cheese, Colby Jack          87.220128            85.424609
357   14104400            Cheese, Feta          67.522226            65.251422
358   14104600         Cheese, Fontina          88.012269            86.287162
359   14104700            Cheese, goat          81.393492            79.492354

[360 rows x 4 columns