# Hackathon - Harvard Spreadsheet and CIA Factbook Processing

## Imports

In [1]:
import pickle
import uuid

import csv    # To handle accessing the Harvard spreadsheet

from bs4 import BeautifulSoup   # To process the HTML of the CIA Factbook
import html5lib  # To handle accessing the CIA World Factbook
import ssl
import urllib

cia_url = 'https://www.cia.gov/the-world-factbook/countries/'
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE

## Dictionary of industries

Mapping the GICS and BBG classifications to Lines of Business is required since sometimes the GICS and BBG classifications completely overlap, and other times, the BBG classification is more exact and the distinction should be maintained. 

To generate the dictionary, all combinations of GICS and BBG classifications found in the spreadsheet were captured. These were manually reviewed to curate the mapping shown below.


In [2]:
industry_dict = {
    'any, Aerospace and Defense': ':AerospaceAndDefenseBusiness', 
    'Air Freight & Logistics, any': ':AirFreightAndLogisticsBusiness', 
    'Airlines, any': ':AirlineBusiness', 
    'Auto Components, any': ':AutoComponentBusiness', 
    'Automobiles, any': ':AutomobileAndMotorcyleBusiness', 
    'Banks, any': ':BankingBusiness', 
    'Capital Markets, any': ':CapitalMarketBusiness',
    'Consumer Finance, any': ':ConsumerFinanceBusiness',
    'any, Brewers': ':BrewingBusiness', 
    'any, Distillers and Vintners': ':DistilleryAndVineryBusiness', 
    'any, Soft Drinks': ':SoftDrinkBusiness', 
    'Biotechnology, any': ':BiotechnologyBusiness', 
    'Building Products, any': ':BuildingProductsBusiness', 
    'any, Commodity Chemicals': ':CommodityChemicalsBusiness', 
    'any, Diversified Chemicals': ':DiversifiedChemicalsBusiness', 
    'any, Fertilizers and Agricultural Chemicals': ':FertilizersAndAgriculturalChemicalsBusiness', 
    'any, Industrial Gases': ':IndustrialGasesBusiness', 
    'any, Specialty Chemicals': ':SpecialtyChemicalsBusiness', 
    'any, Commercial Printing': ':CommercialPrintingBusiness', 
    'any, Diversified Support Services': ':DiversifiedSupportServicesBusiness', 
    'any, Environmental and Facilities Services': ':EnvironmentalAndFacilitiesServicesBusiness', 
    'any, Security and Alarm Services': ':SecurityAndAlarmServicesBusiness', 
    'Communications Equipment, any': ':CommunicationsEquipmentBusiness', 
    'any, Construction and Engineering': ':ConstructionAndEngineeringBusiness', 
    'Construction Materials, any': ':ConstructionMaterialBusiness', 
    'Consumer Finance, any': ':ConsumerFinanceBusiness', 
    'any, Metal and Glass Containers': ':MetalAndGlassContainerBusiness', 
    'any, Paper Packaging': ':PaperPackagingBusiness', 
    'Distributors, any': ':Distributor', 
    'Diversified Consumer Services, any': ':DiversifiedConsumerServicesBusiness', 
    'Diversified Financial Services, any': ':DiversifiedFinancialServicesBusiness', 
    'Diversified Telecommunication Services, any': ':DiversifiedTelecommunicationServicesBusiness', 
    'Electric Utilities, any': ':ElectricUtilitiesBusiness', 
    'any, Electrical Components and Equipment': ':ElectricalComponentsAndEquipmentBusiness',   
    'any, Heavy Electrical Equipment': ':HeavyElectricalEquipmentBusiness', 
    'any, Electronic Components': ':ElectronicComponentsBusiness', 
    'any, Electronic Equipment and Instruments': ':ElectronicEquipmentAndInstrumentsBusiness', 
    'any, Electronic Manufacturing Services': ':ElectronicManufacturingServicesBusiness', 
    'any, Technology Distributors': ':TechnologyDistributor', 
    'any, Oil and Gas Drilling': ':OilAndGasDrillingBusiness', 
    'any, Oil and Gas Equipment and Services': ':OilAndGasEquipmentAndServicesBusiness', 
    'Entertainment, any': ':EntertainmentBusiness', 
    'Equity Real Estate Investment Trusts (REITs), any': ':EquityRealEstateInvestmentTrustBusiness', 
    'any, Food Distributors': ':FoodDistributor', 
    'any, Food Retail': ':FoodRetailBusiness', 
    'any, Hypermarkets and Super Centers': ':HypermarketAndSuperCenterBusiness', 
    'any, Agricultural Products': ':AgriculturalProductsBusiness', 
    'any, Packaged Foods and Meats': ':PackagedFoodsAndMeatsBusiness', 
    'Gas Utilities, any': ':GasUtilitiesBusiness', 
    'Health Care Equipment & Supplies, any': ':HealthCareEquipmentAndSuppliesBusiness', 
    'Health Care Providers & Services, any': ':HealthCareProvidersAndServicesBusiness', 
    'Health Care Technology, any': ':HealthCareTechnologyBusiness', 
    'Hotels, Restaurants & Leisure, any': ':HospitalityBusiness', 
    'any, Consumer Electronics': ':ConsumerElectronicsBusiness', 
    'any, Home Furnishings': ':HomeFurnishingsBusiness', 
    'any, Household Appliances': ':HouseholdAppliancesBusiness', 
    'any, Housewares and Specialties': ':HousewaresAndSpecialtiesBusiness', 
    'any, Homebuilding': ':HomebuildingBusiness', 
    'Household Products, any': ':HouseholdProductsBusiness', 
    'IT Services, any': ':ITServicesBusiness', 
    'Independent Power and Renewable Electricity Producers, any': ':IndependentPowerAndRenewableElectricityProducer', 
    'Industrial Conglomerates, any': ':IndustrialConglomerate', 
    'Insurance, any': ':InsuranceBusiness', 
    'Interactive Media & Services, any': ':InteractiveMediaAndServicesBusiness', 
    'Internet & Direct Marketing Retail, any': ':InternetAndDirectMarketingRetailBusiness', 
    'Leisure Products, any': ':LeisureProductsBusiness', 
    'Life Sciences Tools & Services, any': ':LifeSciencesToolsAndServicesBusiness',
    'any, Agricultural and Farm Machinery': ':AgriculturalAndFarmMachineryBusiness', 
    'any, Construction Machinery and Heavy Trucks': ':ConstructionMachineryAndHeavyTrucksBusiness', 
    'any, Industrial Machinery': ':IndustrialMachinery', 
    'Marine, any': ':MaritimeIndustry', 
    'any, Advertising': ':AdvertisingBusiness', 
    'any, Broadcasting': ':BroadcastingBusiness', 
    'any, Cable and Satellite': ':CableAndSatelliteBusiness', 
    'any, Publishing': ':PublishingBusiness', 
    'any, Aluminum': ':AluminumBusiness', 
    'any, Copper': ':CopperBusiness', 
    'any, Diversified Metals and Mining': ':DiversifiedMetalsAndMiningBusiness', 
    'any, Gold': ':GoldBusiness', 
    'any, Precious Metals and Minerals': ':PreciousMetalsAndMineralsBusiness', 
    'any, Silver': ':SilverBusiness', 
    'any, Steel': ':SteelBusiness', 
    'Multi-Utilities, any': ':MultiUtilitiesBusiness', 
    'Multiline Retail, any': ':MultilineRetailBusiness', 
    'Oil, Gas & Consumable Fuels, any': ':OilGasAndConsumableFuelsBusiness', 
    'any, Coal and Consumable Fuels': ':CoalAndConsumableFuelsBusiness', 
    'any, Integrated Oil and Gas': ':IntegratedOilAndGasBusiness', 
    'any, Oil and Gas Exploration and Production': ':OilAndGasExplorationAndProductionBusiness', 
    'any, Oil and Gas Refining and Marketing': ':OilAndGasRefiningAndMarketingBusiness', 
    'any, Oil and Gas Storage and Transportation': ':OilAndGasStorageAndTransportationBusiness', 
    'any, Forest Products': ':ForestProductsBusiness', 
    'any, Paper Products': ':PaperProductsBusiness', 
    'Personal Products, any': ':PersonalProductsBusiness', 
    'Pharmaceuticals, any': ':PharmaceuticalBusiness', 
    'any, Human Resource and Employment Services': ':HumanResourceAndEmploymentServicesBusiness', 
    'any, Research and Consulting Services': ':ResearchAndConsultingServicesBusiness', 
    'any, Diversified Real Estate Activities': ':DiversifiedRealEstateBusiness', 
    'any, Real Estate Development': ':RealEstateDevelopmentBusiness', 
    'any, Real Estate Operating Companies': ':RealEstateOperatingCompanyBusiness', 
    'any, Real Estate Services': ':RealEstateServicesBusiness', 
    'any, Railroads': ':RailroadBusiness', 
    'any, Trucking': ':TruckingBusiness', 
    'any, Semiconductor Equipment': ':SemiconductorAndEquipmentBusiness', 
    'any, Semiconductors': ':SemiconductorAndEquipmentBusiness', 
    'any, Application Software': ':ApplicationSoftwareBusiness', 
    'any, Systems Software': ':SystemsSoftwareBusiness', 
    'any, Apparel Retail': ':ApparelRetailBusiness', 
    'any, Automotive Retail': ':AutomotiveRetailBusiness', 
    'any, Computer and Electronics Retail': ':ComputersAndElectronicsRetailBusiness', 
    'any, Home Improvement Retail': ':HomeImprovementRetailBusiness', 
    'any, Specialty Stores': ':SpecialtyStoreBusiness', 
    'Technology Hardware, Storage & Peripherals, any': ':TechnologyHardwareStorageAndPeripheralsBusiness', 
    'any, Apparel, Accessories and Luxury Goods': ':ApparelAccessoriesAndLuxuryGoodsBusiness', 
    'any, Footwear': ':FootwearBusiness', 
    'any, Textiles': ':TextileBusiness', 
    'Thrifts & Mortgage Finance, any': ':ThriftAndMortgageFinanceBusiness', 
    'Tobacco, any': ':TobaccoBusiness', 
    'Trading Companies & Distributors, any': ':TradingCompanyAndDistributor', 
    'any, Airport Services': ':AirportServicesBusiness', 
    'any, Highways and Railtracks': ':HighwayAndRailtrackBusiness', 
    'any, Marine Ports and Services': ':MarinePortAndServicesBusiness', 
    'Water Utilities, any': ':WaterUtilitiesBusiness', 
    'Wireless Telecommunication Services, any': ':WirelessTelecommunicationServicesBusiness'
}


## Other references (constants, dicitionaries, arrays, ...)

In [3]:
million = 1000000
billion = 1000000000

# Dictionary of country names mapped to GeoName codes
with open('country_names_mapped_to_geo_codes.pickle', 'rb') as inFile:
    names_to_geo_dict = pickle.load(inFile)
    
# Array of Assessments
assessments = ['Negligible or None', 'Low', 'Medium', 'High', 'Very High']

## Utilities

In [78]:
def add_assessment(level: int, turtle: list):
    turtle.append(f'  :assessment "{assessments[level]}" .')
    

def convert_to_numeric(data_str: str, reverse: bool) -> float:
    if not data_str or '-' in data_str:
        return 0
    # Get rid of commas and parentheses, and then cast to integer
    num = float(data_str.replace(',', '').replace('(', '').replace(')', ''))
    if reverse and not data_str.startswith('('):         # Negative indicator => positive impact
        return (-1) * num
    elif not reverse and data_str.startswith('('):       # Negative indicator => negative amount
        return (-1) * num
    return num


def create_country_measurement_turtle(meas_type: str, geo_id: str, year: int, value: float, 
                                      units: str, turtle: list):
    turtle.extend([f':Measurement{uuid.uuid4()} a {meas_type} ;',
                   f'  :reported_year {year} ; :reported_value {value} ;'])
    if units:
        turtle.append(f'. :has_unit unit:{units} ;')
    turtle.append(f'  :defined_for geo:{geo_id} .')


def create_org_measurement_turtle(meas_type: str, org_iri: str, year: int, value: int, turtle: list):
    if not value:
        return
    turtle.extend([f':Measurement{uuid.uuid4()} a {meas_type} ;',
                   f'  :reported_year {year} ; :reported_value {value} ; :has_unit unit:USDollar ;',
                   f'  :defined_for {org_iri} .'])


def determine_industry(industry1: str, industry2: str) -> str:
    if f'{industry1}, any' in industry_dict.keys():
        return industry_dict[f'{industry1}, any']
    if f'any, {industry2}' in industry_dict.keys():
        return industry_dict[f'any, {industry2}']
    print(f'{industry1}, {industry2} key not found')
    return ':LineOfBusiness'
    
    
def get_cia_value(detail: str, search_text: str) -> float:
    split_search = search_text.split(' ... ')
    cia_value = detail.split(split_search[0])[1]
    if ',' in cia_value:
        cia_value = cia_value.replace(',', '')
    return float(cia_value.split(split_search[1])[0].strip())


def process_urban_rural_total(detail:str) -> (float, float):
    urban = -1
    rural = -1
    total = -1
    if 'total:' in detail and 'total: NA' not in detail:
        total = get_cia_value(detail, 'total: ... %')
    if not total:   # e.g., == 0
        urban = 0
        rural = 0
    if 'urban:' in detail and 'urban: NA' not in detail:
        urban = get_cia_value(detail, 'urban: ... %')
    if 'rural:' in detail and 'rural: NA' not in detail:
        rural = get_cia_value(detail, 'rural: ... %')
    return urban, rural

## Save the Harvard spreadsheet data as RDF

In [5]:
# '0%' worksheet saved as Harvard0.csv 
# BOM removed by 'vi'ing the file, using the instruction, ':set nobomb', and then saving the file
# Also, misspellings ("Faeroe Island") of the country name, "Faroe Islands", were corrected

countries = set()
turtle = ['@prefix : <urn:ontoinsights:dna:> .', 
          '@prefix dna: <urn:ontoinsights:dna:> .', 
          '@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .',
          '@prefix unit: <http://qudt.org/vocab/unit/> .', '']
with open('Harvard0.csv', newline='\n') as harvard:
    rows = csv.DictReader(harvard)
    # Process each row which holds the columns:
    #   Year,Company Name,Country,GICS Industry,Industry (BBG),Industry (Exiobase),
    #   Environmental Intensity (Sales),Environmental Intensity (Op Inc),Total Environmental Impact,
    #   Working Capacity,Fish Production Capacity,Crop Production Capacity,Meat Production Capacity,
    #   Biodiversity,Abiotic Resources,Water production capacity (Drinking water & Irrigation Water),
    #   Wood Production Capacity,Unspecified,GHG Total,VOC,NOx,SOx,Water Use,PM 2.5,Miscellaneous,
    #   SDG 1.5,SDG 2.1,SDG 2.2,SDG 2.3,SDG 2.4,SDG 3.3,SDG 3.4,SDG 3.9,SDG 6,SDG 12.2,SDG 14.1,
    #   SDG 14.2,SDG 14.3,SDG 14.c,SDG 15.1,SDG 15.2,SDG 15.5,% BBG,% Exiobase,Industry Revenue,
    #   Company Sales,Operating Income,Industry Multiplier,AWARE CF 
    for row in rows:
        # Get the data of interest
        year = convert_to_numeric(row['Year'].strip(), False)    # Strip to remove extraneous white space
        org = row['Company Name'].strip().replace(u'\xa0', ' ')  # Convert latin-1 space to space
        country = row['Country'].strip().replace(u'\xa0', ' ')
        countries.add(country)      # Save the countries for further query in CIA Factbook
        geo_id = names_to_geo_dict[country]
        industry1 = row['GICS Industry'].strip().replace(u'\xa0', ' ')
        industry2 = row['Industry (BBG)'].strip().replace(u'\xa0', ' ')
        total = convert_to_numeric(row['Total Environmental Impact'].strip(), True)
        ghg = convert_to_numeric(row['GHG Total'].strip(), True)
        voc = convert_to_numeric(row['VOC'].strip(), True)
        nox = convert_to_numeric(row['NOx'].strip(), True)
        sox = convert_to_numeric(row['SOx'].strip(), True)
        water_use = convert_to_numeric(row['Water Use'].strip(), True)
        pm = convert_to_numeric(row['PM 2.5'].strip(), True)
        misc = convert_to_numeric(row['Miscellaneous'].strip(), True)
        sdg1_5 = convert_to_numeric(row['SDG 1.5'].strip(), True)
        sdg2_1 = convert_to_numeric(row['SDG 2.1'].strip(), True)
        sdg2_2 = convert_to_numeric(row['SDG 2.2'].strip(), True)
        sdg2_3 = convert_to_numeric(row['SDG 2.3'].strip(), True)
        sdg2_4 = convert_to_numeric(row['SDG 2.4'].strip(), True)
        sdg3_3 = convert_to_numeric(row['SDG 3.3'].strip(), True)
        sdg3_4 = convert_to_numeric(row['SDG 3.4'].strip(), True)
        sdg3_9 = convert_to_numeric(row['SDG 3.9'].strip(), True)
        sdg6 = convert_to_numeric(row['SDG 6'].strip(), True)
        sdg14_1 = convert_to_numeric(row['SDG 14.1'].strip(), True)
        sdg14_2 = convert_to_numeric(row['SDG 14.2'].strip(), True)
        sdg14_3 = convert_to_numeric(row['SDG 14.3'].strip(), True)
        sdg14_c = convert_to_numeric(row['SDG 14.c'].strip(), True)
        sdg15_1 = convert_to_numeric(row['SDG 15.1'].strip(), True)
        sdg15_2 = convert_to_numeric(row['SDG 15.2'].strip(), True)
        sdg15_5 = convert_to_numeric(row['SDG 15.5'].strip(), True)
        industry_sales = convert_to_numeric(row['Industry Revenue'].strip(), False)
        sales = convert_to_numeric(row['Company Sales'].strip(), False)
        income = convert_to_numeric(row['Operating Income'].strip(), False)
        aware = row['AWARE CF'].strip()
        # Determine the industry/line of business reference
        industry = determine_industry(industry1, industry2)
        # Create Organization Turtle
        org_iri = f':Org{uuid.uuid4()}'
        industry = determine_industry(industry1, industry2)
        turtle.extend([f'{org_iri} a :Organization ; rdfs:label "{org}" ;',
                       f'  :is_in_industry {industry} ;',
                       f'  :has_headquarters geo:{geo_id} .'])
        create_org_measurement_turtle(':TotalOperatingIncome', org_iri, year, income, turtle)
        create_org_measurement_turtle(':TotalSales', org_iri, year, sales, turtle)
        create_org_measurement_turtle(':TotalEnvironmentalImpact', org_iri, year, total, turtle)
        create_org_measurement_turtle(':ImpactToHabitatsAndBiodiversity', org_iri, year, 
                                      (sdg14_1 + sdg14_2 + sdg14_3 + sdg14_c + sdg15_1 + sdg15_2 + sdg15_5), turtle)
        create_org_measurement_turtle(':ImpactToResilienceToEvents', org_iri, year, sdg1_5, turtle)
        create_org_measurement_turtle(':ImpactToReductionOfDiseasesAndMortality', org_iri, year, 
                                      (sdg3_3 + sdg3_4 + sdg3_9), turtle)
        create_org_measurement_turtle(':ImpactToAccessToFood', org_iri, year, (sdg2_1 + sdg2_2), turtle)
        create_org_measurement_turtle(':ImpactToAgriculturalProductivity', org_iri, year, (sdg2_3 + sdg2_4), turtle)
        create_org_measurement_turtle(':ImpactToPotableWaterAndSanitation', org_iri, year, sdg6, turtle)
        create_org_measurement_turtle(':WaterUseImpact', org_iri, year, water_use, turtle)
        create_org_measurement_turtle(':ImpactOfGreenhouseGases', org_iri, year, ghg, turtle)
        create_org_measurement_turtle(':ImpactOfVOCs', org_iri, year, voc, turtle)
        create_org_measurement_turtle(':ImpactOfNOx', org_iri, year, nox, turtle)
        create_org_measurement_turtle(':ImpactOfSOx', org_iri, year, sox, turtle)
        create_org_measurement_turtle(':ImpactOfParticulateMatter', org_iri, year, pm, turtle)
        create_org_measurement_turtle(':ImpactOfMiscCompounds', org_iri, year, misc, turtle)
        # Create AvgSalesByIndustry Turtle (triples may be repeated but that will not affect what is stored in the db)
        if industry_sales:
            turtle.extend([f':Measurement{uuid.uuid4()} a :AvgSalesByIndustry ;',
                           f'  :reported_year {year} ; :reported_value {industry_sales} ; :has_unit unit:USDollar ;',
                           f'  :defined_for geo:{geo_id} ; :about_industry {industry} .'])
        # Create WaterScarcityAssessment from AWARE CF
        # The indicator is limited to a range from 0.1 to 100, with a value of 1 corresponding to the world average, 
        # and a value of 10, for example, representing a region where there is 10 times less available water remaining 
        # per area than the world average. 
        # Negligible scarcity has value <= 1, Low has value <= 5, Medium has value <= 10, High has value <= 25
        # Very High scarcity has value <= 100
        if aware and not '-' in aware:
            cf = float(aware.strip())
            turtle.extend([f':Assessment{uuid.uuid4()} a :WaterScarcityAssessment ;',
                           f'  :reported_year {year} ; :defined_for geo:{geo_id} ;'])
            if cf <= 1.0:
                add_assessment(0, turtle)
            elif cf <= 5.0:
                add_assessment(1, turtle)
            elif cf <= 10.0:
                add_assessment(2, turtle)
            elif cf <= 25.0:
                add_assessment(3, turtle)
            else:
                add_assessment(4, turtle)
        turtle.append('')

with open('harvard_data.ttl', 'w') as harvard_out:
    harvard_out.write(('\n').join(turtle))


## Get CIA Factbook data for the Countries in the Harvard spreadsheet

In [81]:
# For each of the countries, get the HTML from the CIA Factbook URL and pass on to BeautifulSoup for parsing
# Save results in new Turtle file
cia_turtle = ['@prefix : <urn:ontoinsights:dna:> .', 
              '@prefix dna: <urn:ontoinsights:dna:> .', 
              '@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .',
              '@prefix unit: <http://qudt.org/vocab/unit/> .', '']

for country in countries:
    geo_id = names_to_geo_dict[country]
    if country == 'South Korea':    # Need to special case these
        country = 'Korea South'
    elif country == 'Czech Republic':
        country = 'Czechia'
    print(country)
    url_request = urllib.request.urlopen(f'{cia_url}{country.lower().replace(" ", "-")}/', context=ctx)
    html = url_request.read().decode()
    soup = BeautifulSoup(html, 'html5lib')
    
    # Get all the statistics of interest
    # From Geography
    for value in soup.find_all('a', href='/the-world-factbook/field/area'):
        detail = value.parent.nextSibling.get_text()
        cia_turtle.extend([f'geo:{geo_id} :area_sq_kms {get_cia_value(detail, "total: ... sq km")} ; ',
                           f'  :land_sq_kms {get_cia_value(detail, "land: ... sq km")} ;'])
        
    # Assessments from Environment (current issues, food insecurity) and People and Society (physicians density)
    for value in soup.find_all('a', href='/the-world-factbook/field/environment-current-issues'):
        cia_turtle.append(f'  :environmental_issues "{value.parent.nextSibling.get_text()}" .')
    for value in soup.find_all('a', href='/the-world-factbook/field/food_insecurity'):
        cia_turtle.extend([f':Assessment{uuid.uuid4()} a :FoodInsecurityAssessment ;',
                           f'  :reported_year 2021 ; :defined_for geo:{geo_id} ;'])
        detail = value.parent.nextSibling.get_text()
        if 'exceptional' in detail:
            add_assessment(4, turtle)
        elif 'widespread' in detail:
            add_assessment(3, turtle)
        else:
            # Severe but localized
            cia_turtle.append('  :localized True ;')
            add_assessment(3, turtle)
    for value in soup.find_all('a', href='/the-world-factbook/field/physicians-density'):
        # The World Health Organization estimates that fewer than 2.3 health workers per 1,000 is 
        # insufficient to achieve coverage of primary healthcare needs
        # Negligible has value <= 1.0, Low has value < 2.3, Medium has value < 2.6, High has value < 3
        # Very High has value >= 3.0
        detail = value.parent.nextSibling.get_text()
        num = float(detail.split(' physicians')[0])
        year = detail.split('(')[1].replace(')', '')
        cia_turtle.extend([f':Assessment{uuid.uuid4()} a :HealthCareAssessment ;',
                           f'  :reported_year {year} ; :defined_for geo:{geo_id} ;'])
        if num <= 1.0:
            add_assessment(0, turtle)
        elif num < 2.3:
            add_assessment(1, turtle)
        elif num < 2.6:
            add_assessment(2, turtle)
        elif num < 3.0:
            add_assessment(3, turtle)
        else:
            add_assessment(4, turtle)
            
    # Measurements from People and Society  
    for value in soup.find_all('a', href='/the-world-factbook/field/population'):
        detail = value.parent.nextSibling.get_text()
        population = detail.split('(')[0].replace(',', '').strip()
        create_country_measurement_turtle(':TotalPopulation', geo_id, 2021, population, '', cia_turtle)
    for value in soup.find_all('a', href='/the-world-factbook/field/population-growth-rate'):
        detail = value.parent.nextSibling.get_text()
        create_country_measurement_turtle(':PopulationGrowth', geo_id, 2021, 
                                          float(detail.split('%')[0]), 'PERCENT', cia_turtle)
    for value in soup.find_all('a', href='/the-world-factbook/field/drinking-water-source'):
        detail = value.parent.nextSibling.get_text()
        urban, rural = process_urban_rural_total(detail.split('unimproved:')[1] )
        if urban > -1 and rural > -1:
            year = detail.split('(')[1].split(')')[0].replace('est.', '').strip()
            if urban > -1:
                create_country_measurement_turtle(':UrbanPopulationLackingPotableWater', geo_id, 
                                                  year, urban, 'PERCENT', cia_turtle)
            if rural > -1:
                create_country_measurement_turtle(':RuralPopulationLackingPotableWater', geo_id, 
                                                  year, rural, 'PERCENT', cia_turtle)
    for value in soup.find_all('a', href='/the-world-factbook/field/sanitation-facility-access'):
        detail = value.parent.nextSibling.get_text()
        urban, rural = process_urban_rural_total(detail.split('unimproved:')[1] )
        if urban > -1 and rural > -1:
            year = detail.split('(')[1].split(')')[0].replace('est.', '').strip()
            if urban > -1:
                create_country_measurement_turtle(':UrbanPopulationLackingSanitation', geo_id, 
                                                  year, urban, 'PERCENT', cia_turtle)
            if rural > -1:
                create_country_measurement_turtle(':RuralPopulationLackingSanitation', geo_id, 
                                                  year, rural, 'PERCENT', cia_turtle)
        
    # Measurements from Environment
    for value in soup.find_all('a', href='/the-world-factbook/field/air-pollutants'):
        detail = value.parent.nextSibling.get_text()
        if 'particulate matter emissions:' in detail:
            create_country_measurement_turtle(
                ':ParticulateEmissions', geo_id, detail.split('(')[1].split('est.')[0],
                get_cia_value(detail, 'particulate matter emissions: ... micrograms'), 
                'MicroGM-PER-M3', cia_turtle)
        if 'carbon dioxide emissions:' in detail:
            create_country_measurement_turtle(
                ':CO2Emissions', geo_id, detail.split('carbon dioxide')[1].split('(')[1].split('est.')[0],
                get_cia_value(detail, 'carbon dioxide emissions: ... megatons') * million, 
                'TON_US', cia_turtle)
        if 'methane emissions:' in detail:
            create_country_measurement_turtle(
                ':MethaneEmissions', geo_id, detail.split('methane')[1].split('(')[1].split('est.')[0],
                get_cia_value(detail, 'methane emissions: ... megatons') * million, 
                'TON_US', cia_turtle)

with open('cia_data.ttl', 'w') as cia_out:
    cia_out.write(('\n').join(cia_turtle))
    

Netherlands
Poland
Colombia
Monaco
Mauritius
Germany
Macau
Ireland
Faroe Islands
Bangladesh
Lebanon
France
Hungary
Panama
Iceland
Austria
Norway
Japan
Italy
Thailand
Denmark
Oman
Georgia
Canada
Korea South
Saudi Arabia
Indonesia
Jersey
Luxembourg
Sweden
Uganda
United States
Brazil
United Kingdom
Mexico
China
Belgium
Hong Kong
Taiwan
Jordan
Spain
Kazakhstan
Philippines
Australia
Peru
Slovenia
Chile
Argentina
Egypt
Sri Lanka
United Arab Emirates
Singapore
Portugal
Botswana
Turkey
Kuwait
India
Finland
Russia
Malaysia
Morocco
Qatar
New Zealand
Czechia
Nigeria
Bermuda
Pakistan
Greece
Israel
Kenya
Switzerland
South Africa


In [None]:
+ Land use   href="/the-world-factbook/field/land-use"
  agricultural land
  agricultural land: arable land
  agricultural land: permanent crops
  agricultural land: permanent pasture
  forest
  other
  ex: agricultural land: 52.9% (2018 est.)arable land: 11.6% (2018 est.)permanent crops: 0.09% (2018 est.)permanent pasture: 88.4% (2018 est.)forest: 16.2% (2018 est.)other: 30.9% (2018 est.)
+ Urbanization   href="/the-world-factbook/field/urbanization"
  urban population
  rate of urbanization
  ex: urban population: 86.4% of total population (2021)rate of urbanization: 1.27% annual rate of change (2020-25 est.)note: data include Christmas Island, Cocos Islands, and Norfolk Island
+ Waste and recycling   href="/the-world-factbook/field/waste-and-recycling"
  municipal solid waste generated annually
  municipal solid waste recycled annually
  percent of municipal solid waste recycled
  ex: municipal solid waste generated annually: 13.345 million tons (2015 est.)municipal solid waste recycled annually: 5,618,245 tons (2015 est.)percent of municipal solid waste recycled: 42.1% (2015 est.)
+ Total renewable water resources   href="/the-world-factbook/field/total-renewable-water-resources"
  ex: 492 billion cubic meters (2017 est.)
    
    # Measurements from Economy
+ Real GDP (purchasing power parity)
  $77.04 billion note: data are in 2017 dollars (2020 est.)
  $78.56 billion note: data are in 2017 dollars (2019 est.)
  $75.6 billion note: data are in 2017 dollars (2018 est.)
  note: data are in 2017 dollars
+ GDP (official exchange rate)    href="/the-world-factbook/field/gdp-official-exchange-rate">
  ex: $475.062 billion (2019 est.)
+ Inflation rate (consumer prices)    href="/the-world-factbook/field/inflation-rate-consumer-prices">
  ex: 11.3% (2019 est.)12.1% (2018 est.)16.5% (2017 est.)
+ Labor force
  8.478 million (2017 est.)  
+ Unemployment rate    href="/the-world-factbook/field/unemployment-rate">
  ex: 16.5% (2017 est.)13.9% (2016 est.)
+ Population below poverty line    href="/the-world-factbook/field/population-below-poverty-line"
  ex: 40.1% (2018 est.)
        
    # Measurements from Energy
+ Electricity access    href="/the-world-factbook/field/electricity-access">
  population without electricity
  electrification - total population
  electrification - urban areas
  electrification - rural areas
  ex: electrification - total population: 100% (2020)
  ex: electrification - total population: 62% (2019)electrification - urban areas: 91% (2019)electrification - rural areas: 30% (2019)
+ Electricity - production    href="/the-world-factbook/field/electricity-production">
  ex: 243 billion kWh (2016 est.)
+ Electricity - consumption    href="/the-world-factbook/field/electricity-consumption">
  ex: 229.4 billion kWh (2016 est.)
+ Electricity - installed generating capacity    href="/the-world-factbook/field/electricity-installed-generating-capacity"
  ex: 65.56 million kW (2016 est.)
+ Electricity - from fossil fuels    href="/the-world-factbook/field/electricity-from-fossil-fuels"
  ex: 72% of total installed capacity (2016 est.)
+ Electricity - from nuclear fuels    href="/the-world-factbook/field/electricity-from-nuclear-fuels"
  ex: 0% of total installed capacity (2017 est.)
+ Electricity - from hydroelectric plants    href="/the-world-factbook/field/electricity-from-hydroelectric-plants"
  ex: 11% of total installed capacity (2017 est.)
+ Electricity - from other renewable sources    href="/the-world-factbook/field/electricity-from-other-renewable-sources"
  ex: 17% of total installed capacity (2017 est.)
+ Crude oil - production    href="/the-world-factbook/field/crude-oil-production"
  ex: 284,000 bbl/day (2018 est.)
+ Crude oil - proved reserves    href="/the-world-factbook/field/crude-oil-proved-reserves"
  ex: 1.821 billion bbl (1 January 2018 est.)
+ Refined petroleum products - production    href="/the-world-factbook/field/refined-petroleum-products-production"
  ex: 462,500 bbl/day (2017 est.)
+ Refined petroleum products - consumption    href="/the-world-factbook/field/refined-petroleum-products-consumption"
  ex: 1.175 million bbl/day (2017 est.)
+ Natural gas - production    href="/the-world-factbook/field/natural-gas-production"
  ex: 105.2 billion cu m (2017 est.)
+ Natural gas - consumption    href="/the-world-factbook/field/natural-gas-consumption"
  ex: 45.25 billion cu m (2017 est.)
+ Natural gas - proved reserves    href="/the-world-factbook/field/natural-gas-proved-reserves"
  ex: 1.989 trillion cu m (1 January 2018 est.)