In [4]:
import pandas as pd
from pymongo import MongoClient
import datetime
import pytz
from bson.objectid import ObjectId
import os 

datetime.datetime.now(pytz.utc).isoformat()

uri = os.getenv('MONGO_URI')

client = MongoClient(uri)

db = client.johannesvc # .General

In [3]:
list(db.metricMeta.find({'framework': "IRIS+"},{'embeddings':0}).limit(20))

[{'_id': ObjectId('6449b299a2927cd3aa323dc5'),
  'code': 'greenhouse_gas_emissions_mitigated',
  'title': 'Greenhouse Gas Emissions Mitigated',
  'description': 'Amount of greenhouse gas (GHG) emissions mitigated by the organization during the reporting period, including GHG emissions reductions from both direct and indirect sources.',
  'type': 'numeric',
  'measure': 'tCO2eq',
  'framework': ['IRIS+'],
  'category': ['Climate']},
 {'_id': ObjectId('6449b2b7a2927cd3aa323dc8'),
  'code': 'water_saved',
  'title': 'Water Saved',
  'description': '',
  'type': 'numeric',
  'measure': 'megaliters',
  'framework': ['IRIS+'],
  'category': ['Water']},
 {'_id': ObjectId('6449b2c8a2927cd3aa323dca'),
  'code': 'active_platform_users',
  'title': 'Active Platform Users',
  'description': '',
  'type': 'numeric',
  'measure': '',
  'framework': ['IRIS+'],
  'category': ['Cross Category']},
 {'_id': ObjectId('6449b304a2927cd3aa323dcc'),
  'code': 'ghg_emissions_scope_2',
  'title': 'GHG Emissions

In [13]:
list(db.metricMeta.find({},{'embeddings':0}))

[{'_id': ObjectId('64da2bcae5031249c4c7fc36'),
  'ID': 'PI9991',
  'Metric Name': 'Supplier Individuals: Smallholder',
  'Definition': 'Number of smallholder farmer individuals who sold to the organization during the reporting period.',
  'Footnote': 'Organizations should footnote all assumptions used, including source of data.',
  'Usage Guidance': 'This metric is intended to capture the number of smallholder suppliers of the organization during the reporting period. This metric is intended for use by organizations that seek to support smallholder farmer individuals through their supply chains.\n\nThis metric is multi-dimensional with regard to the five dimensions of impact: it may help describe the WHO dimension when the stakeholder group represented by the metric is the stakeholder group targeted by the investment or organization. It may also help measure the HOW MUCH Scale dimension, which helps estimate the number of the targeted stakeholders experiencing the outcome. For more on 

In [11]:
min_id = ObjectId("649425ea463481ed1cc81145")
list(db.metricMeta.find({"_id": {"$lte": min_id}}, {'embeddings': 0}))

[{'_id': ObjectId('6449b299a2927cd3aa323dc5'),
  'code': 'greenhouse_gas_emissions_mitigated',
  'title': 'Greenhouse Gas Emissions Mitigated',
  'description': 'Amount of greenhouse gas (GHG) emissions mitigated by the organization during the reporting period, including GHG emissions reductions from both direct and indirect sources.',
  'type': 'numeric',
  'measure': 'tCO2eq',
  'framework': ['IRIS+'],
  'category': ['Climate']},
 {'_id': ObjectId('6449b2a3a2927cd3aa323dc6'),
  'code': 'km2_of_forest_monitored_for_clients',
  'title': 'KM2 of Forest Monitored for Clients',
  'description': 'This metric represents the number of square kilometers of primary forest screened through satellite imagery services for clients by company.',
  'type': 'numeric',
  'measure': 'km2',
  'framework': ['Custom'],
  'category': ['Biodiversity'],
  'Usage Guidance': None},
 {'_id': ObjectId('6449b2ada2927cd3aa323dc7'),
  'code': 'candidates_assessed_bias_free',
  'title': 'Candidates Assessed Bias-Fre

In [12]:
db.metricMeta.delete_many({"_id": {"$lte": min_id}})

<pymongo.results.DeleteResult at 0x1a6f3678c40>

In [15]:
list(db.metricMeta.find({'description':''}, {'embeddings': 0}))

[]

In [28]:
pais = list(db.metricMeta.find({'framework':['Principal Adverse Impacts (PAI)']}, {'embeddings': 0}))

In [18]:
len(pais)

70

In [29]:
pais

[{'_id': ObjectId('64da629c5871d7c8f80f1865'),
  'title': 'GHG emissions: Scope 2 GHG emissions',
  'description': 'Scope 2 GHG emissions.',
  'category': 'Climate and other environment-related indicators (Greenhouse gas emissions )',
  'framework': ['Principal Adverse Impacts (PAI)'],
  'Mandatory': True,
  'createdAt': datetime.datetime(2023, 8, 14, 17, 20, 31, 979000),
  'Calculation': 'SUM((value of investment / enterprise value) * scope X emissions)',
  'Usage Guidance': 'Fields required: Impact [year n], Impact [year n-1], Explanation, Actions taken,  actions planned, and targets set for the next reference period.',
  'Footnote': "This indicator is mandatory under the EU's Sustainable Finance Disclosure Regulations (SFDR)."},
 {'_id': ObjectId('64da629c5871d7c8f80f1867'),
  'title': 'Total GHG emissions',
  'description': 'Total GHG emissions.',
  'category': 'Climate and other environment-related indicators (Greenhouse gas emissions )',
  'framework': ['Principal Adverse Impacts

In [22]:
query = {'framework': ['Principal Adverse Impacts (PAI)']}

# Find the documents and update them
for doc in db.metricMeta.find(query):
    description = doc.get('description', "")
    parts = description.split('\n')
    
    if len(parts) >= 3:
        new_description = parts[0].strip()
        usage_guidance = "Fields required: " + parts[1].replace("Fields required:", "").strip()
        calculation = parts[2].replace("Formula:", "").strip()
        
        update_query = {
            "$set": {
                "description": new_description,
                "Usage Guidance": usage_guidance,
                "Calculation": calculation
            }
        }
        
        db.metricMeta.update_one({"_id": doc['_id']}, update_query)

print("Documents updated.")


Documents updated.


In [26]:
query = {'framework': ['Principal Adverse Impacts (PAI)']}

for doc in db.metricMeta.find(query):
    
    if doc['Mandatory'] == True:
        Footnote = "This indicator is mandatory under the EU's Sustainable Finance Disclosure Regulations (SFDR)."
    else: 
        Footnote = "This indicator is voluntary under the EU's Sustainable Finance Disclosure Regulations (SFDR)."
    update_query = {
            "$set": {
                "Footnote": Footnote,
            }
        }
        
    db.metricMeta.update_one({"_id": doc['_id']}, update_query)

# fix exchange docs with dead fields

In [51]:
requests = list(db.exchange.find({}))

In [54]:
requests[0]

{'_id': ObjectId('64ef5f5d02e3446ce8134f08'),
 'title': 'Generic request',
 'portfolio_link': '64ef5f1e02e3446ce8134f07',
 'fund_link': '64ef097318d8d61675b7f415',
 'companies': [{'company_link': '6447eff42a81aa1fc0aa7e30',
   'requested_metrics': ['64ef064360c02836e9cba7e3',
    '64da2bcae5031249c4c7fc67']},
  {'company_link': '64ef16a883af84a0ad8bc1c2',
   'requested_metrics': ['64ef064360c02836e9cba7e3',
    '64da2bcae5031249c4c7fc67']},
  {'company_link': '6463316d5ab3f926ff0db8e1',
   'requested_metrics': ['64ef064360c02836e9cba7e3',
    '64da2bcae5031249c4c7fc67']}],
 'metrics_set': True,
 'reporting_period': ['01/22', '12/22'],
 'deadline': '06/12/2023'}

In [48]:
# just print
for req in requests:
    entitity_id = req['fund_link']
    company = db.funds.find_one({'_id': ObjectId(entitity_id)})
    company['title']

    company_list = []
    for comp in req['companies']:
        entitity_id = comp['company_link']
        collection = db.companies
        company = collection.find_one({'_id': ObjectId(entitity_id)})
        if company:
            company_list.append(company['title'])
            
    metric_list = []
    metrics = req['companies'][0]['requested_metrics']
    for metric in metrics:
        entitity_id = metric
        metr = db.metricMeta.find_one({'_id': ObjectId(entitity_id)})
        if metr:
            metric_list.append(metr['title'])

    print(company_list, metric_list)

['Pacific Carbon Capture', 'BioFuel Dynamics', 'Winnow'] ['SFDR 9 Pack', 'Greenhouse Gas Emissions Sequestered']
['Pacific Carbon Capture', 'BioFuel Dynamics', 'Winnow'] ['SFDR 9 Pack', 'Greenhouse Gas Emissions Sequestered']
['Pacific Carbon Capture', 'Winnow', 'WindFlow Energy'] ['SFDR 9 Pack', 'Greenhouse Gas Emissions Sequestered']
['Pacific Carbon Capture', 'Winnow', 'WindFlow Energy'] ['SFDR 9 Pack', 'Greenhouse Gas Emissions Sequestered']
['Pacific Carbon Capture', 'WindFlow Energy', 'Winnow'] ['Greenhouse Gas Emissions Sequestered', 'SFDR 9 Pack']
['Pacific Carbon Capture', 'Winnow'] ['SFDR 9 Pack', 'Greenhouse Gas Emissions Sequestered']
['Carbonfuture', 'Circularise', 'Circularise', 'Circularise', 'CarbonFuture', 'CarbonZero Tech', 'Climate Guard Technologies', 'Climate Tech Labs Inc.'] ['SFDR 9 Pack', 'Greenhouse Gas Emissions Sequestered']
['BioFuel Dynamics', 'Resource Reinvented Inc.'] ['SFDR 9 Pack', 'Greenhouse Gas Emissions Sequestered']


In [44]:
# remove requests with dead fields (from exchange collection)
for req in requests:
    exchange_id = req['_id']
    entitity_id = req['fund_link']
    fund = db.funds.find_one({'_id': ObjectId(entitity_id)})
    if fund is None: # dead fund
        db.exchange.delete_one({'_id': exchange_id})
        break

    for comp in req['companies']:
        entitity_id = comp['company_link']
        collection = db.companies
        company = collection.find_one({'_id': ObjectId(entitity_id)})
        if company is None: # dead company
            db.exchange.delete_one({'_id': exchange_id})
            break
        
    for metric in req['companies'][0]['requested_metrics']:
        metr = db.metricMeta.find_one({'_id': ObjectId(metric)})
        if metr is None: # dead metric
            exchange_id = req['_id']
            db.exchange.delete_one({'_id': exchange_id})    
            break
                 
    if not req['companies'][0]['requested_metrics']: # or if it just doesn't have metrics
        exchange_id = req['_id']
        db.exchange.delete_one({'_id': exchange_id})

In [49]:
len(list(db.exchange.find({})))

8

In [53]:
for req in requests:
    exchange_id = req['_id']
    entitity_id = req['fund_link']
    fund = db.funds.find_one({'_id': ObjectId(entitity_id)})
    if fund is None: # dead fund
        print(entitity_id)
    else:     
        print('not dead', entitity_id)
        # db.exchange.delete_one({'_id': exchange_id})
        # break

not dead 64ef097318d8d61675b7f415
not dead 64ef097318d8d61675b7f415
not dead 64ef097318d8d61675b7f415
not dead 64ef097318d8d61675b7f415
not dead 64ef097318d8d61675b7f415
not dead 64ef097318d8d61675b7f415
not dead 6447ef3b63bfe2715b407dd3
not dead 64ef097318d8d61675b7f415


# better demo material
    make user with fund
    fund with portfolio
    request
    then ingest 3-5 metricvalues 

In [8]:
db.users.find_one({
 'name': 'Johannes VC',
 'email': 'johannes@supernova.ai',
 'password': 'you-shouldnt-see-this',
 'fund_link': '65005d6bc50c91cfe368a9ef',
 'type': 'fund-investor'})

{'_id': ObjectId('65005d84c50c91cfe368a9f0'),
 'name': 'Johannes VC',
 'email': 'johannes@supernova.ai',
 'password': 'you-shouldnt-see-this',
 'fund_link': '65005d6bc50c91cfe368a9ef',
 'type': 'fund-investor'}

In [5]:
fund = db.funds.insert_one({
  'title': 'Demo Fund',
  'description': 'Enabling the energy transition by investing in sustainable cleantech companies.'})
fund

<pymongo.results.InsertOneResult at 0x2076928aec0>

In [6]:
fund.inserted_id

ObjectId('65005d6bc50c91cfe368a9ef')

In [13]:
portfolio = db.portfolios.find_one({'_id': ObjectId('65006dadd092adeb1aec983a')})
portfolio

{'_id': ObjectId('65006dadd092adeb1aec983a'),
 'title': 'Investing for impact',
 'description': 'Enabling the energy transition by investing in sustainable cleantech companies',
 'companies': [{'company_link': '65006d49d092adeb1aec9831', 'weight': 35},
  {'company_link': '65006d67d092adeb1aec9834', 'weight': 10},
  {'company_link': '65006d85d092adeb1aec9836', 'weight': 20},
  {'company_link': '65006da9d092adeb1aec9838', 'weight': 35}],
 'aum': '20000000',
 'currency': 'eur',
 'objectives': ['Enabling the energy transition by investing in sustainable cleantech companies'],
 'fund_link': '65005d6bc50c91cfe368a9ef'}

In [18]:
request = db.exchange.find_one({'portfolio_link': '65006dadd092adeb1aec983a'})
request

{'_id': ObjectId('65006e1fd092adeb1aec983b'),
 'title': 'Generic request',
 'portfolio_link': '65006dadd092adeb1aec983a',
 'fund_link': '65005d6bc50c91cfe368a9ef',
 'companies': [{'company_link': '65006d49d092adeb1aec9831',
   'requested_metrics': ['64da2bcae5031249c4c7fc5d',
    '64da2bcae5031249c4c7fd29',
    '64da2bcae5031249c4c7fc5e',
    '64da2bcae5031249c4c7feb3',
    '64da2bcae5031249c4c7fcd3',
    '64da2bcae5031249c4c7fd2f',
    '64da2bcae5031249c4c7fd5d',
    '64da2bcae5031249c4c7fdcf']},
  {'company_link': '65006d67d092adeb1aec9834',
   'requested_metrics': ['64da2bcae5031249c4c7fc5d',
    '64da2bcae5031249c4c7fd29',
    '64da2bcae5031249c4c7fc5e',
    '64da2bcae5031249c4c7feb3',
    '64da2bcae5031249c4c7fcd3',
    '64da2bcae5031249c4c7fd2f',
    '64da2bcae5031249c4c7fd5d',
    '64da2bcae5031249c4c7fdcf']},
  {'company_link': '65006d85d092adeb1aec9836',
   'requested_metrics': ['64da2bcae5031249c4c7fc5d',
    '64da2bcae5031249c4c7fd29',
    '64da2bcae5031249c4c7fc5e',
    '64

In [15]:
example = {'metric_meta_link': '64da2bcae5031249c4c7fc5d',
                            'proof_file_url': ['https://random.cat/','https://random.cat/','https://random.cat/'],
                            'value': 13,
                            'unit': "kgCO2eq", 
                            'aggregation_type': "sum",
                            'description': """<b>Scope of Emission Calculation:</b> Operational Control<br/>Chosen because EcoVolt has full authority over its manufacturing facilities, R&D labs, and administrative buildings.<br/><br/><b>Emission Sources:</b><br/>Manufacturing Facilities: Energy consumption for machinery and HVAC.<br/>R&D Labs: Equipment and testing-related emissions.<br/>Fleet Vehicles: CO2 emitted from company-owned electric and gas vehicles.<br/>Employee Commuting: Optional inclusion due to substantial impact.<br/><br/><b>Emission Factors:</b><br/>Manufacturing: Based on kWh consumed, using IPCC Tier 2 factors.<br/>R&D Labs: Custom factors developed through lifecycle analysis of specific equipment.<br/>Fleet Vehicles: EPA's emission factors for light-duty and heavy-duty vehicles.<br/>Commuting: Employee survey-based average distance, using UK DEFRA factors for varied modes of transport.<br/><br/><b>Methodology:</b> <br/>Follows the Greenhouse Gas Protocol Corporate Standard.<br/><br/><b>Calculation:</b><br/>Manufacturing Facilities: <br/>  - Energy consumption: 10,000 kWh<br/>  - IPCC Tier 2 factor: 0.5 kg CO2e/kWh<br/>  - Emissions = 10,000 kWh * 0.5 kg CO2e/kWh = 5,000 kg CO2e<br/>  <br/>R&D Labs: <br/>  - Equipment run-time: 4,000 hours<br/>  - Custom factor: 0.1 kg CO2e/hour<br/>  - Emissions = 4,000 hours * 0.1 kg CO2e/hour = 400 kg CO2e<br/>  <br/>Fleet Vehicles: <br/>  - Miles driven: 20,000 miles<br/>  - EPA factor: 0.4 kg CO2e/mile<br/>  - Emissions = 20,000 miles * 0.4 kg CO2e/mile = 8,000 kg CO2e<br/>  <br/>Employee Commuting: <br/>  - Total commuting miles: 30,000 miles<br/>  - UK DEFRA factor: 0.2 kg CO2e/mile<br/>  - Emissions = 30,000 miles * 0.2 kg CO2e/mile = 6,000 kg CO2e<br/><br/><b>Total Scope 1 Emissions:</b> 5,000 kg (Manufacturing) + 400 kg (R&D) + 8,000 kg (Fleet) + 6,000 kg (Commuting) = 19,400 kg CO2e for FY 2022-2023.""",
                            'reporting_period': '4/22 - 4/23', 
                            'submitted': True, 
                            'company_link': '65006d49d092adeb1aec9831'}

In [16]:
r = db.metricValues.update_one({'_id': ObjectId('65009416c50c91cfe368a9f1')}, {'$set': example})

        'unit' should be dropdown: kgCO2eq, MtCO2eq, GWh, KWh, Joules, Tons, Kg, Km, Square meters, Cubic meters, Hectares, Hours, Liters, Other [allow other option]

        'aggregation_type' should be: sum (for absolute numbers), weighted average (for percentages)

In [17]:
db.metricValues.find_one({'_id': ObjectId('65009416c50c91cfe368a9f1')})

{'_id': ObjectId('65009416c50c91cfe368a9f1'),
 'metric_meta_link': '64da2bcae5031249c4c7fc5d',
 'proof_file_url': ['https://random.cat/',
  'https://random.cat/',
  'https://random.cat/'],
 'value': 13,
 'unit': 'kgCO2eq',
 'description': "<b>Scope of Emission Calculation:</b> Operational Control<br/>Chosen because EcoVolt has full authority over its manufacturing facilities, R&D labs, and administrative buildings.<br/><br/><b>Emission Sources:</b><br/>Manufacturing Facilities: Energy consumption for machinery and HVAC.<br/>R&D Labs: Equipment and testing-related emissions.<br/>Fleet Vehicles: CO2 emitted from company-owned electric and gas vehicles.<br/>Employee Commuting: Optional inclusion due to substantial impact.<br/><br/><b>Emission Factors:</b><br/>Manufacturing: Based on kWh consumed, using IPCC Tier 2 factors.<br/>R&D Labs: Custom factors developed through lifecycle analysis of specific equipment.<br/>Fleet Vehicles: EPA's emission factors for light-duty and heavy-duty vehicl