### Import data

In [104]:
import pandas as pd
import numpy as np
import psycopg2
import json

In [105]:
clothing_df = pd.read_csv('clothing.csv', index_col=0)
item_material_df = pd.read_csv('item_has_material.csv', index_col=0)
part_df = pd.read_csv('part_component_value.csv', index_col=0)
mat_table = pd.read_csv('material_mapped.csv')

### Get Score

In [108]:
def getScore(clothing_id):

    #init
    material_list = []
    part_list = []
    associated_part_list = []
    percentage_weight_list = []
    new_percentage_weight_list = []
    material_dict =  {}

    #init percentage weights
    main_percentage = 0
    secondary_percentage = 0
    tertiary_percentage = 0
    adjusted_secondary_percentage = 0
    adjusted_main_percentage = 0
    count_main=0
    count_secondary=0
    count_tertiary=0
    component_value_main = 0
    component_value_secondary = 0
    component_value_tertiary = 0



    #low level cat
    category = clothing_df[clothing_df['clothing_id']== clothing_id]['low_level'].iloc[0]
    #brand = clothing_df[clothing_df['clothing_id']== clothing_id]['brand_name'].iloc[0]

    try:
        for i in range(len(item_material_df[item_material_df['clothing_id']==clothing_id])):
            row = item_material_df[item_material_df['clothing_id']==clothing_id].iloc[i]
            #print(row)
            material_list.append(row['material_id'])
            part_list.append(row['part'])
            percentage_weight_list.append(row['percentage'])

        for part in set(part_list):
            row = part_df[part_df['low_level']== category][part_df['part']== part]
            #print(row)
            if row['component_significance'].iloc[0] == 'Tertiary':
                tertiary_percentage += float(row['component_value_percentage'].iloc[0].strip('%'))/ 100.0

            if row['component_significance'].iloc[0] == 'Secondary':
                secondary_percentage += float(row['component_value_percentage'].iloc[0].strip('%'))/ 100.0

            if row['component_significance'].iloc[0] == 'Main':
                main_percentage += float(row['component_value_percentage'].iloc[0].strip('%'))/ 100.0

        for i in range(len(part_list)):
            row = part_df[part_df['low_level']== category][part_df['part']== part_list[i]]
            if row['component_significance'].iloc[0] == 'Tertiary':
                count_tertiary += 1
                component_value_tertiary += float(percentage_weight_list[i].strip('%'))/ 100.0
            if row['component_significance'].iloc[0] == 'Secondary':
                count_secondary += 1
                component_value_secondary += float(percentage_weight_list[i].strip('%'))/ 100.0
            if row['component_significance'].iloc[0] == 'Main':
                count_main += 1
                component_value_main += float(percentage_weight_list[i].strip('%'))/ 100.0


        if main_percentage > 1:
            adjusted_main_percentage = 1.0
        else:
            adjusted_main_percentage = main_percentage
        if secondary_percentage>1:
            adjusted_secondary_percentage = 1.0
        else:
            adjusted_secondary_percentage = secondary_percentage

        if adjusted_main_percentage + adjusted_secondary_percentage > 1.0:
            new_adjusted_main_percentage = adjusted_main_percentage/ (adjusted_main_percentage + adjusted_secondary_percentage)
            new_adjusted_secondary_percentage = adjusted_secondary_percentage / (adjusted_main_percentage + adjusted_secondary_percentage)
            adjusted_main_percentage = new_adjusted_main_percentage - tertiary_percentage/2
            adjusted_secondary_percentage = new_adjusted_secondary_percentage - tertiary_percentage/2
        else:
            adjusted_main_percentage = adjusted_main_percentage - tertiary_percentage
            adjusted_secondary_percentage = adjusted_secondary_percentage - tertiary_percentage

        for i in range(len(part_list)):

            row = part_df[part_df['low_level']== category][part_df['part']== part_list[i]]

            if row['component_significance'].iloc[0] == 'Tertiary':
                new_weight = (float(percentage_weight_list[i].strip('%')) * tertiary_percentage/component_value_tertiary)/100.
                new_percentage_weight_list.append(round(new_weight, 2))

            if row['component_significance'].iloc[0] == 'Secondary':
                new_weight = (float(percentage_weight_list[i].strip('%')) * adjusted_secondary_percentage/component_value_secondary)/100.0
                new_percentage_weight_list.append(round(new_weight, 2))

            if row['component_significance'].iloc[0] == 'Main':
                new_weight = (float(percentage_weight_list[i].strip('%')) * adjusted_main_percentage/component_value_main)/100.0
                new_percentage_weight_list.append(round(new_weight, 2))


        for i in range(len(material_list)):
            if material_list[i] in material_dict:
                material_dict[material_list[i]] += new_percentage_weight_list[i]
            else:
                material_dict[material_list[i]] = new_percentage_weight_list[i]

        total_percentage = adjusted_main_percentage + adjusted_secondary_percentage + tertiary_percentage
        #print('total_percentage')
        #print(total_percentage)

    except:
        #material_dict['clothing_id'] = clothing_id
        pass

    return material_dict


In [109]:
def get_scores_material_dict(material_dict, brand_name):


    ################################################ INPUT ################################################


    # Material info
    materials = list(material_dict.keys())
    weights = list(material_dict.values())
    n = len(weights)

    ############################################ GETTING SCORES ###############################################
    # making sure the weights are tractable numbers
    for i in range(len(weights)):
        weights[i] = float(weights[i])

    # scores
    GlobalWarmingList = []
    EutrophicationList = []
    WaterScarcityList = []
    AbioticResourceDepletionList = []
    FiberTypeList = []
    MicroplasticPollutionList = []
    RegenerativeList=[]
    CO2WeightList = []
    WaterVolumeList = []


    # Metrics of CO2, water and Fossil Fuel usage
    CO2Weight = 0.0
    WaterVolume = 0.0
    RecyclabilityScore = 0.0
    RegenerativeFactor = 0.0

    #boolean values to calculate recyclability
    isRecyclableMat = True
    isAllSynthetic = True
    isBiodegradable = True


    # looping over every material
    for i in range(len(materials)):

        row = higg[higg['material_id'] == str(materials[i])]
        #print(row)

        #higg score
        GlobalWarmingList.append(
            weights[i]*float(row['score_globwarm']))
        EutrophicationList.append(
            weights[i]*float(row['score_eutrophication']))
        WaterScarcityList.append(
            weights[i]*float(row['score_water']))
        AbioticResourceDepletionList.append(
            weights[i]*float(row['score_resourcedepletion']))

        #higg CO2, Water metrics
        CO2WeightList.append(weights[i]*float(row['co2e_kg']))
        WaterVolumeList.append(weights[i]*float(row['water_volume_m3']))


        #circularity score
        FiberTypeList.append(row['is_natural'].iloc[0])
        #print(row)
        if row['is_recyclable'].iloc[0] == 'Non-Recyclable':
            isRecyclableMat = False
        if row['is_biodegradable'].iloc[0] == 'Not Biodegradable':
            isBiodegradable = False
        if row['is_microplastic_pollutant'].iloc[0] == 'Microplastic Pollutant':
            MicroPollutantFactor = 1.0
        else:
            MicroPollutantFactor = 5.0
        MicroplasticPollutionList.append(weights[i]*MicroPollutantFactor)
        if row['is_regenerative'].iloc[0] == 'Non-Regenerative':
            RegenerativeFactor = 1.0
        elif row['is_regenerative'].iloc[0] == 'Semi-Regenerative':
            RegenerativeFactor = 3.0
        elif row['is_regenerative'].iloc[0] == 'Regenerative':
            RegenerativeFactor = 5.0
        RegenerativeList.append(weights[i]*RegenerativeFactor)



################################################ OUTPUT ################################################

    #HIGG SCORE compute the sum of weight*material_score
    WaterScore = 0.0
    EutrophiScore = 0.0
    CO2Score = 0.0
    FossilFuelScore = 0.0

    for ws in WaterScarcityList:
        WaterScore += ws
    for es in EutrophicationList:
        EutrophiScore += es
    for gs in GlobalWarmingList:
        CO2Score += gs
    for ard in AbioticResourceDepletionList:
        FossilFuelScore += ard

    #higg MSI impact metrics
    for footprint in CO2WeightList:
        CO2Weight += footprint
    for wate_wasted in WaterVolumeList:
        WaterVolume += wate_wasted


    #CIRCULARITY SCORES:

    #RECYCLABILITY
    if len(set(FiberTypeList)) != 1: #different fiber types equals non recyclability
        isAllSynthetic = False
        RecyclabilityScore = 1.0

    if isRecyclableMat is False:
        RecyclabilityScore = 1.0
    elif isRecyclableMat and isAllSynthetic is True:
        RecyclabilityScore = 5
    else:
        RecyclabilityScore = 1.0

    #BIODEGRADABLE?
    if isBiodegradable is True:
        BiodegradableScore = 5.0
    else:
        BiodegradableScore = 1.0

    #MICROPLASTIC POLLUTION
    MicroplasticPollutionScore = 0.0
    for mpp in MicroplasticPollutionList:
        MicroplasticPollutionScore += mpp

    #REGENERATIVENESS
    RegenerativeScore = 0.0
    for rg in RegenerativeList:
        RegenerativeScore += rg


    # GoodOnYou score
    good_score_loc = good[good['frontend_brandname'].str.contains(
        brand_name, case=False)]
    if good_score_loc.empty:
        good_score_loc = good[good['description'].str.contains(
            brand_name, case=False)]
        if good_score_loc.empty:
            planet_score = None
            people_score = None
        else:
            planete_sc = good_score_loc['rating'].loc[good_score_loc['domain'] == 'Planet']
            people_sc = good_score_loc['rating'].loc[good_score_loc['domain'] == 'People']
            planet_score = planete_sc.iloc[0][0] #extract the score from string
            people_score = people_sc.iloc[0][0] #extract the score from string
    else:
        if len(good_score_loc) == 3:
            planete_sc = good_score_loc['rating'].loc[good_score_loc['domain'] == 'Planet']
            people_sc = good_score_loc['rating'].loc[good_score_loc['domain'] == 'People']
            planet_score = planete_sc.iloc[0][0] #extract the score from string
            people_score = people_sc.iloc[0][0] #extract the score from string
        else:
            planet_score = None
            people_score = None
            pass


    water_score = round(WaterScore, 3)
    eutrophi_score = round(EutrophiScore, 3)
    co2_score = round(CO2Score, 3)
    fossilfuel_score = round(FossilFuelScore, 3)

    #TOTAL SCORE WEIGHTS
    brand_weight = 0.5
    higg_weight = 0.25
    circularity_weight = 0.25

    #number of components
    num_brand_score_component = 2
    num_higg_component = 4
    num_circularity_component = 4

    if planet_score is not None:
        brand_score = (float(planet_score) + float(people_score))/num_brand_score_component
        higg_score = (WaterScore + EutrophiScore + CO2Score + FossilFuelScore) / num_higg_component
        circularity_score = (RecyclabilityScore + BiodegradableScore + MicroplasticPollutionScore + RegenerativeScore)/num_circularity_component
        overall_score = higg_weight*higg_score + brand_weight*float(brand_score) + circularity_weight*circularity_score
        overall_score = round(overall_score, 3)

    else:
        brand_score = None
        higg_score = (WaterScore + EutrophiScore + CO2Score + FossilFuelScore) / num_higg_component
        circularity_score = (RecyclabilityScore + BiodegradableScore + MicroplasticPollutionScore + RegenerativeScore)/num_circularity_component
        overall_score = (higg_weight*higg_score + brand_weight*float(brand_score))*2
        overall_score = round(overall_score, 3)

    scores_table = pd.DataFrame([brand_name, water_score, eutrophi_score,
                        co2_score, fossilfuel_score, brand_score, planet_score, people_score, overall_score,
                        circularity_score, higg_score, WaterVolume, CO2Weight,
                        RecyclabilityScore , BiodegradableScore , MicroplasticPollutionScore, RegenerativeScore]).transpose()

    scores_table.columns = ['BrandName', 'WaterScore', 'EutrophicationScore',
                            'CO2Score', 'FossilFuelScore', 'GoodOnYou', 'PlanetScore', 'PeopleScore', 'OverallScore',
                            'CircularityScore', 'HiggScore','WaterVolume', 'CO2Weight',
                            'RecyclabilityScore', 'BiodegradableScore', 'MicroplasticPollutionScore', 'RegenerativeScore']

    return scores_table['OverallScore']


In [110]:
def concatScores(row):
    material_dict = row['material_dict']
    brand_name = row['brand_name']
    try:
        return get_scores_material_dict(material_dict, brand_name)
    except:
        return pd.DataFrame(np.nan, index=[0], columns=['OverallScore'])


In [111]:
def renameHM(name):
    if name == 'hm':
        return 'H&M'
    else:
        return name
clothing_df['brand_name'] = clothing_df['brand_name'].apply(renameHM)

In [113]:
#clothing_df[clothing_df['material_dict'].apply(lambda x: x.values == {})]

#### Calculate the scores

In [114]:
clothing_df['material_dict'] = clothing_df['clothing_id'].apply(getScore)



In [115]:
clothing_df['OverallScore'] = clothing_df.apply(concatScores,axis=1)

In [116]:
clothing_df['image_link_color'] = clothing_df['image_link_color'].apply(json.dumps)
clothing_df['material_dict'] = clothing_df['material_dict'].apply(json.dumps)

In [171]:
clothing_df_copy = clothing_df.copy()

In [172]:
clothing_df_copy['color'] = clothing_df_copy['color'].str.replace('[','{').str.replace(']','}')
clothing_df_copy['size'] = clothing_df_copy['size'].str.replace('[','{').str.replace(']','}')

In [173]:
clothing_df_copy['size'] = clothing_df_copy['size'].str.replace('"','inches')

In [117]:
#reindex to concat
#score_df = score_df.reset_index().drop('index',axis=1)
#clothing_df = clothing_df.reset_index().drop('index',axis=1)

In [118]:
#new_clothing_df = pd.concat([clothing_df,score_df], axis=1)

In [119]:
#unmatched miscellaneous polyester produces NaN values
clothing_df[clothing_df['OverallScore'].isna()]

Unnamed: 0,clothing_id,display_name,color,size,price,product_url,image_link_color,brand_name,description,scrapped_date,low_level,material_dict,OverallScore


In [120]:
clothing_df['gender'] = np.nan

In [122]:
clothing_df = clothing_df[['clothing_id', 'display_name', 'color', 'size', 'price', 'product_url',
       'image_link_color', 'brand_name', 'description', 'scrapped_date',
       'low_level', 'material_dict', 'gender', 'OverallScore']]

In [174]:
clothing_df_copy.to_csv('clothing_df.csv', index=None)