In [46]:
import pandas as pd
import json
import numpy as np
import re
from collections.abc import Iterable

# Load CSV Data into Pandas DataFrame
profiles_df = pd.read_csv('profiles.csv')
organizations_df = pd.read_csv('organizations.csv')
foundersCircles_df = pd.read_csv('circles.csv')
blockchainEcosystem_df = pd.read_csv('blockchainecosystem.csv')
impactArea_df = pd.read_csv('impactarea.csv')
countries_df = pd.read_csv('countries.csv')
workinggroups_df = pd.read_csv('workinggroups.csv')
localnodes_df = pd.read_csv('localnodes.csv')
events_df = pd.read_csv('events.csv')
deals_df = pd.read_csv('deals.csv')
content_df = pd.read_csv('content.csv')


# Function to remove emojis from text
def remove_emojis(text):
    emoji_pattern = re.compile(
        "["
        "\U0001F1E0-\U0001F1FF"  # flags (iOS)
        "\U0001F300-\U0001F5FF"  # symbols & pictographs
        "\U0001F600-\U0001F64F"  # emoticons
        "\U0001F680-\U0001F6FF"  # transport & map symbols
        "\U0001F700-\U0001F77F"  # alchemical symbols
        "\U0001F780-\U0001F7FF"  # Geometric Shapes Extended
        "\U0001F800-\U0001F8FF"  # Supplemental Arrows-C
        "\U0001F900-\U0001F9FF"  # Supplemental Symbols and Pictographs
        "\U0001FA00-\U0001FA6F"  # Chess Symbols
        "\U0001FA70-\U0001FAFF"  # Symbols and Pictographs Extended-A
        "\U00002702-\U000027B0"  # Dingbat symbols
        "]+",
        flags=re.UNICODE,
    )
    return emoji_pattern.sub(r"", text)

# Create mappings for each DataFrame
def create_uris(df, prefix, id_column='Name'):
    return {str(row[id_column]): f"urn:{prefix}:{remove_emojis(str(row[id_column]))}" 
            for index, row in df.iterrows()}

profiles_uris = create_uris(profiles_df, 'profile', id_column='Name')
organizations_uris = create_uris(organizations_df, 'organization', id_column='Name')
foundersCircles_uris = create_uris(foundersCircles_df, 'circle', id_column='Name')
blockchainEcosystem_uris = create_uris(blockchainEcosystem_df, 'blockchainecosystem', id_column='Name')
impactArea_uris = create_uris(impactArea_df, 'impactarea', id_column='Name')
countries_uris = create_uris(countries_df, 'country', id_column='Name')
workinggroups_uris = create_uris(workinggroups_df, 'workinggroup', id_column='Name')
localnodes_uris = create_uris(localnodes_df, 'localnode', id_column='Name')
events_uris = create_uris(events_df, 'event', id_column='Name')
deals_uris = create_uris(deals_df, 'deal', id_column='Deal')
content_uris = create_uris(content_df, 'content', id_column='Name')

# Function to convert a row into JSON-LD format
def row_to_jsonld(row):
    jsonld = {
        "@context": "https://schema.org",
        "@type": "Person",
        "@id": profiles_uris[str(row['Name'])],
    }

    if pd.notna(row['Name']):
        jsonld["name"] = remove_emojis(row['Name'])
        
    if pd.notna(row['Photo']):
        jsonld["image"] = row['Photo']
        
    if pd.notna(row['Public Profile']):
        jsonld["publicprofilechecked"] = row['Public Profile']
    
    if pd.notna(row['Organization']):
        jsonld["affiliation"] = {
            "@type": "Organization",
            "@id": organizations_uris.get(str(row['Organization'])),
            "name": remove_emojis(row['Organization'])
        }
    
    if pd.notna(row['Role']):
        jsonld["jobTitle"] = remove_emojis(row['Role'])
    
    if pd.notna(row['Founder of']):
        jsonld["alumniOf"] = {
            "@type": "Organization",
            "@id": organizations_uris.get(str(row['Founder of'])),
            "name": remove_emojis(row['Founder of'])
        }
        
    if pd.notna(row['Topic']):
        jsonld["knowsAbout"] = remove_emojis(row['Topic'])

    if pd.notna(row['Guild Member']):
        jsonld["memberOf"] = {
            "@type": "Organization",
            "@id": organizations_uris.get(str(row['Guild Member'])),
            "name": remove_emojis(row['Guild Member'])
        }
        
    if pd.notna(row['Languages Spoken']):
        jsonld["knowsLanguage"] = row['Languages Spoken']
    
    if pd.notna(row['Headline']):
        jsonld["headline"] = remove_emojis(row['Headline'])

    if pd.notna(row['Bio']):
        jsonld["description"] = remove_emojis(row['Bio'])

    sameAs = []
    if pd.notna(row['Twitter']):
        sameAs.append(row['Twitter'])
    if pd.notna(row['LinkedIn']):
        sameAs.append(row['LinkedIn'])
    if sameAs:
        jsonld["sameAs"] = sameAs

    if pd.notna(row['Events (speaking at)']):
        jsonld["attendeeOf"] = {
            "@type": "Event",
            "@id": events_uris.get(str(row['Events (speaking at)'])),
            "name": remove_emojis(row['Events (speaking at)'])
        }
        
    if pd.notna(row['Local Community Member']):
        jsonld["LocalCommunity"] = {
            "@type": "Place",
            "@id": localnodes_uris.get(str(row['Local Community Member'])),
            "name": remove_emojis(row['Local Community Member'])
        }
    
    if pd.notna(row['Content Author of']):
        jsonld["creatorOf"] = {
            "@type": "CreativeWork",
            "@id": content_uris.get(str(row['Content Author of'])),
            "name": remove_emojis(row['Content Author of'])
        }
    
    if pd.notna(row['Featured in Content']):
        jsonld["isPartOf"] = {
            "@type": "MediaObject",
            "@id": content_uris.get(str(row['Featured in Content'])),
            "name": remove_emojis(row['Featured in Content'])
        }
    
    if pd.notna(row['Working Groups']):
        jsonld["memberOf"] = {
            "@type": "Organization",
            "@id": localnodes_uris.get(str(row['Working Groups'])),
            "name": remove_emojis(row['Working Groups'])
        }

    return jsonld

# Apply the function to each row in the DataFrame
profiles_json_ld = profiles_df.apply(row_to_jsonld, axis=1).tolist()

# Print out the first item to verify
print(json.dumps(profiles_json_ld[23], indent=2) if profiles_json_ld else 'No data')


# Function to convert an Organization row into JSON-LD format
def organization_row_to_jsonld(row):
    jsonld = {
        "@context": "https://schema.org",
        "@type": "Organization",
        "@id": organizations_uris[str(row['Name'])],
    }

    if pd.notna(row['Name']):
        jsonld["name"] = remove_emojis(row['Name'])

    if pd.notna(row['Logo']):
        jsonld["logo"] = row['Logo']

    if pd.notna(row['URL']):
        jsonld["url"] = row['URL']

    if pd.notna(row['Type']):
        jsonld["additionalType"] = row['Type']

    if pd.notna(row['Description']):
        jsonld["description"] = remove_emojis(row['Description'])

    sameAs_links = []
    if pd.notna(row['Twitter']):
        sameAs_links.append(row['Twitter'])

    if pd.notna(row['LinkedIn']):
        sameAs_links.append(row['LinkedIn'])

    if sameAs_links:
        jsonld["sameAs"] = sameAs_links

    if pd.notna(row['Web3']):
        jsonld["memberOf"] = {
            "@type": "Organization",
            "@id": blockchainEcosystem_uris.get(str(row['Web3'])),
            "name": remove_emojis(row['Web3'])
        }

    if pd.notna(row['Founders']):
        jsonld["founder"] = {
            "@type": "Person",
            "@id": profiles_uris.get(str(row['Founders'])),
            "name": remove_emojis(row['Founders'])
        }

    if pd.notna(row['Team and Contributors']):
        jsonld["employee"] = {
            "@type": "Person",
            "@id": profiles_uris.get(str(row['Team and Contributors'])),
            "name": remove_emojis(row['Team and Contributors'])
        }

    if pd.notna(row['Received Investment in']):
        jsonld["funder"] = {
            "@type": "Organization",
            "name": remove_emojis(row['Received Investment in'])  # Modify according to your structure
        }

    if pd.notna(row['Events']):
        jsonld["sponsor"] = {
            "@type": "Event",
            "@id": events_uris.get(str(row['Events'])),
            "name": remove_emojis(row['Events'])
        }

    if pd.notna(row['Impact area']):
        jsonld["areaServed"] = {
            "@id": impactArea_uris.get(str(row['Impact area'])),
            "name": remove_emojis(row['Impact area'])
    }

    if pd.notna(row['ReFiDAO FC Circles (from Team and Contributors)']):
        jsonld["memberOf"] = {
            "@type": "Organization",
            "@id": foundersCircles_uris.get(str(row['ReFiDAO FC Circles (from Team and Contributors)'])),
            "name": remove_emojis(row['ReFiDAO FC Circles (from Team and Contributors)'])
        }

    if pd.notna(row['Content Produced']):
        jsonld["creatorOf"] = {
            "@type": "CreativeWork",
            "@id": content_uris.get(str(row['Content Produced'])),
            "name": remove_emojis(row['Content Produced'])
        }

    if pd.notna(row['Featured in Content']):
        jsonld["isPartOf"] = {
            "@type": "MediaObject",
            "@id": content_uris.get(str(row['Featured in Content'])),
            "name": remove_emojis(row['Featured in Content'])
        }

    if pd.notna(row['Local Communities']):
        jsonld["LocalCommunity"] = {
            "@type": "Organization",  # Modified to Organization
            "@id": localnodes_uris.get(str(row['Local Communities'])),
            "name": remove_emojis(row['Local Communities'])
        }

    if pd.notna(row['Working Groups']):
        jsonld["memberOf"] = {
            "@type": "Organization",
            "@id": workinggroups_uris.get(str(row['Working Groups'])),
            "name": remove_emojis(row['Working Groups'])
        }

    if pd.notna(row['Headline']):
        jsonld["headline"] = remove_emojis(row['Headline'])

    return jsonld

# Apply the function to each row in the DataFrame
organizations_json_ld = organizations_df.apply(organization_row_to_jsonld, axis=1).tolist()

# Print out the first item to verify
print(json.dumps(organizations_json_ld[0], indent=2) if organizations_json_ld else 'No data')

def country_to_jsonld(row):
    jsonld = {
        "@context": "https://schema.org",
        "@type": "Country",
        "@id": countries_uris[str(row['Name'])],
        "name": row['Name'],
        "containedInPlace": {"@type": "Continent", "name": row['Continent']},
        "languages": row['Languages'] if pd.notna(row['Languages']) else None
    }

    # If there are events, add them to the JSON-LD
    if pd.notna(row['Events']) or pd.notna(row['Events copy']):
        events = []
        events_list = str(row['Events']).split('|') + str(row['Events copy']).split('|')

        for event in events_list:
            event = event.strip()
            if event and event.lower() != 'nan':  # Check if the event is not empty and not 'nan'
                events.append({
                    "@type": "Event",
                    "@id": events_uris.get(event),
                    "name": event
                })

        if events:
            jsonld['event'] = events

    # If there are local communities, add them to the JSON-LD
    if pd.notna(row['Local Communities']):
        localCommunities = []
        localCommunities_list = str(row['Local Communities']).split('|')
        
        for community in localCommunities_list:
            if pd.notna(community) and community.strip():
                localCommunities.append({
                    "@type": "Organization",
                    "@id": localnodes_uris.get(community.strip()),
                    "name": community.strip()
                })
                
        if localCommunities:
            jsonld['Local Community'] = localCommunities

    # If there are people, add them to the JSON-LD
    if pd.notna(row['People']):
        people = []
        people_list = str(row['People']).split('|')
        
        for person in people_list:
            if pd.notna(person) and person.strip():
                people.append({
                    "@type": "Person",
                    "@id": profiles_uris.get(person.strip()),
                    "name": person.strip()
                })
                
        if people:
            jsonld['associatedPersons'] = people

    return jsonld


# Apply the function to each row in the countries DataFrame
countries_json_ld = countries_df.apply(country_to_jsonld, axis=1).tolist()

# Print out the first item to verify
print(json.dumps(countries_json_ld[0], indent=2) if countries_json_ld else 'No data')

def impact_area_to_jsonld(row):
    name = remove_emojis(row['Name']).strip()
    jsonld = {
        "@context": "https://schema.org",
        "@type": "ImpactArea",  # Assuming "ImpactArea" is the appropriate type
        "@id": f"urn:impactArea:{name}",  # You can change this ID format as needed
        "name": name,
        "description": row['Description'] if pd.notna(row['Description']) else None,
        "image": row['Logo'] if pd.notna(row['Logo']) else None,
        "headline": row['Headline'] if pd.notna(row['Headline']) else None,
    }

    if pd.notna(row['Organizations']):
        organizations = []
        organizations_list = str(row['Organizations']).split('|')
        
        for org in organizations_list:
            org = org.strip()
            if org and org.lower() != 'nan':
                organizations.append({
                    "@type": "Organization",
                    "@id": organizations_df.get(org),  # Replace with actual way to get the ID
                    "name": org
                })

        jsonld['organizations'] = organizations

    if pd.notna(row['Events']):
        events = []
        events_list = str(row['Events']).split('|')
        
        for event in events_list:
            event = event.strip()
            if event and event.lower() != 'nan':
                events.append({
                    "@type": "Event",
                    "@id": events_uris.get(event),  
                    "name": event
                })

        jsonld['event'] = events

    if pd.notna(row['People']):
        people = []
        people_list = str(row['People']).split('|')
        
        for person in people_list:
            person = person.strip()
            if person and person.lower() != 'nan':
                people.append({
                    "@type": "Person",
                    "@id": profiles_uris.get(person),  
                    "name": person
                })

        jsonld['associatedPersons'] = people

    return jsonld

# Apply the function to each row in the impact areas DataFrame
impact_areas_json_ld = impactArea_df.apply(impact_area_to_jsonld, axis=1).tolist()

# Print out the first item to verify
print(json.dumps(impact_areas_json_ld[0], indent=2) if impact_areas_json_ld else 'No data')


def blockchain_ecosystem_to_jsonld(row):
    jsonld = {
        "@context": "https://schema.org",
        "@type": "Organization",
        "@id": f"urn:blockchainEcosystem:{row['Name']}",
        "name": row['Name']
    }

    if pd.notna(row['URL']):
        jsonld['url'] = row['URL']

    if pd.notna(row['Twitter']):
        jsonld['sameAs'] = row['Twitter']

    if pd.notna(row['Logo']):
        jsonld['logo'] = row['Logo']

    if pd.notna(row['Market cap']):
        jsonld['marketCap'] = row['Market cap']

    if pd.notna(row['TVL ($)']):
        jsonld['totalValueLocked'] = row['TVL ($)']

    if pd.notna(row['Grants awarded']):
        jsonld['grantsAwarded'] = row['Grants awarded']

    if pd.notna(row['Grants value ($)']):
        jsonld['grantsValue'] = row['Grants value ($)']

    # If there are organizations, add them to the JSON-LD
    if pd.notna(row['Organizations']):
        organizations = []
        orgs_list = str(row['Organizations']).split(',')
        for org in orgs_list:
            org_strip = org.strip()
            organization = {
                "@type": "Organization",
                "name": org_strip
            }
            if org_strip in organizations_uris:
                organization["@id"] = organizations_uris[org_strip]
            organizations.append(organization)
        if organizations:
            jsonld['memberOf'] = organizations

    # If there are stewards, add them to the JSON-LD
    if pd.notna(row['Stewards']):
        stewards = []
        stewards_list = str(row['Stewards']).split(',')
        stewards_email_list = str(row['Email (from Stewards)']).split(',') if pd.notna(row['Email (from Stewards)']) else [None] * len(stewards_list)

        for steward, email in zip(stewards_list, stewards_email_list):
            steward = steward.strip()
            if steward:
                steward_data = {
                    "@type": "Person",
                    "name": steward
                }
                # Check if the steward exists in the profiles array and add URI if exists
                if profiles_uris.get(steward):
                    steward_data["@id"] = profiles_uris[steward]
                # If URI doesn't exist, add email if it exists
                elif email and email.strip():
                    steward_data["email"] = email.strip()

                stewards.append(steward_data)
        if stewards:
            jsonld['accountablePerson'] = stewards



    # If there are content, add them to the JSON-LD
    if pd.notna(row['Content']):
        contents = []
        content_list = str(row['Content']).split('|')
        for content in content_list:
            if pd.notna(content) and content.strip():
                contents.append(content.strip())
        jsonld['owns'] = contents

    # If there are events, add them to the JSON-LD
    if pd.notna(row['Events']):
        events = []
        events_list = str(row['Events']).split('|')
        for event in events_list:
            if pd.notna(event) and event.strip():
                events.append({
                    "@type": "Event",
                    "@id": events_uris.get(event.strip()),
                    "name": event.strip()
                })
        jsonld['event'] = events
    
    return jsonld

# Apply the function to each row in the blockchain ecosystems DataFrame
blockchain_ecosystems_json_ld = blockchainEcosystem_df.apply(blockchain_ecosystem_to_jsonld, axis=1).tolist()

# Print out the first item to verify
print(json.dumps(blockchain_ecosystems_json_ld[0], indent=2) if blockchain_ecosystems_json_ld else 'No data')


{
  "@context": "https://schema.org",
  "@type": "Person",
  "@id": "urn:profile:Do\u011fa \u00d6zt\u00fcz\u00fcn",
  "name": "Do\u011fa \u00d6zt\u00fcz\u00fcn",
  "image": "image.png (https://v5.airtableusercontent.com/v1/21/21/1695693600000/mb9e95k_CRT2bKwEyHO0ZA/jP6qOCOKNpksO0vx8wIwIyHbg8i3pKd433FMGlb20WdU-A3oicVgQrl8Ah4l31e4Xf7O7zkcqoXcjztreJlFHvR55rl0XLus4QIa0_Boptw/WOiec5_8SbpWGsR9paBln6v432i6swkYwiRrqo_pf9I)",
  "publicprofilechecked": "checked",
  "affiliation": {
    "@type": "Organization",
    "@id": "urn:organization:Earthist Network",
    "name": "Earthist Network"
  },
  "jobTitle": "\u00a0Founder",
  "alumniOf": {
    "@type": "Organization",
    "@id": "urn:organization:Earthist Network",
    "name": "Earthist Network"
  },
  "knowsAbout": "MRV,Agriculture,Biodiversity",
  "description": "Regenerative hemp production.",
  "sameAs": [
    "https://www.linkedin.com/in/dogaoztuzun/"
  ],
  "LocalCommunity": {
    "@type": "Place",
    "@id": "urn:localnode:ReFi T\u00fcrkiy