# HarmonyHaven

HH will be an AI-Powered virtual garden to support collaboration and teamwork, with a focus on mental health, and drive engagement for remote workers.

## Data Integration

Our vision is to connect with business tools popular today and pull in their data to track projects and interactions. And then use this data not only to track these things but integrate AI for summarization, recommendations, etc.

### Imports

In [26]:
import os
import json
import requests
import pandas as pd
from datetime import datetime, timezone

from dotenv import load_dotenv

### Setup API Variables

In [27]:
load_dotenv()
api_token = os.getenv('NOTION_TOKEN')

notion_request_headers = {
    "Authorization": f"Bearer {api_token}",
    "Content-Type": "application/json",
    "Notion-Version": "2022-06-28"
}

### Pages

Pull down metadata for all pages. Unfortunately the endpoint only pulls in pages that are part of a database, but I may have a workaround. The details pulled in that query will have parent information, in that case being the parent database. But if I can then also pull the metadata of the database, I can maybe get more parent info from that database to get more information.

In [28]:
search_params = {"filter": {"value": "page", "property": "object"}}
page_url = 'https://api.notion.com/v1/search'
page_search_response = requests.post(
    page_url, json=search_params, headers=notion_request_headers)

page_search = page_search_response.json()

pages = page_search['results']

# with open('../data/page_search.json', 'w', encoding='utf8') as f:
#     json.dump(pages, f, ensure_ascii=False, indent=4)  

### Find Page Metadata
page_id, title, created_date, created_by, last_edit_date, last_edited_by

In [29]:
#page_id
pages[0]['id']

'd691c2c9-4fd2-4baf-a6fd-83322633867f'

In [30]:
#page_title
pages[0]['properties']['title']['title'][0]['plain_text']

'10/24/23 Weekly Meeting'

In [31]:
#try and except for different title paths
try:
    print(pages[1]['properties']['title']['title'][0]['plain_text'])
except KeyError:
    print(pages[1]['properties']['']['title'][0]['plain_text'])


Meeting Notes Team Space


In [32]:
#created_time
pages[0]['created_time']

'2023-10-25T00:45:00.000Z'

In [33]:
#created_by
pages[0]['created_by']['id']

'73daee2c-ca1d-4f67-95f9-8fa2b09f7c76'

In [34]:
#last_edited
pages[0]['last_edited_time']

'2023-10-25T22:19:00.000Z'

In [35]:
#last_edited_by
pages[0]['last_edited_by']['id']

'4c629a82-589e-46e2-8d2c-6f1edb7747ae'

In [36]:
for page in pages:
    print(page['id'])

d691c2c9-4fd2-4baf-a6fd-83322633867f
0fa7ab57-628c-4fd9-b09e-5f0841d7fc54
ee3521d2-4eb8-4877-8269-f33a3f20b797
f99302ab-5823-4b41-91ec-ee19e76c9507
70435839-44c4-4b9b-83e5-e32bcc1b7953
168dd073-847b-4c40-86fd-2ce2a38af9ff
f682d891-f32b-43e0-9ee0-816708106592
97669985-adde-4d8e-8257-f0682b706488
f1b60866-6355-481a-ad82-685ae0b9bac8
fd6b3b97-aecd-4a58-8869-f8445954fb2e
3b55f455-2084-417b-b0ce-b4d496d776e7
276b6640-208d-47e8-85fb-94d4ebff5839
17b5fb74-f3bf-4dea-8603-26b14e449807
a710a520-8d71-430c-bd98-2a9a83bb54f6
edb93178-f5f0-403c-9eba-b99aec713d5f
071ac2f4-c4ba-4ef6-bbe6-fbfe1574681e
845c6795-17ca-4c3e-b350-dd502ad885e3
80d73909-0bbb-40e8-8064-7cf76dce32d6
9b368e8e-ea07-40c0-997a-6504b4126435
82714da0-753c-441f-9b88-7440176a3d49
84f27261-b092-41b2-9015-695349b921e6
b8f0aacc-176d-4432-b3b8-5bd19d892615


### Test Database Pull

First need to find a page that has a database. Looking for the secondary research page.

In [50]:
pages[16]['properties']['']['title'][0]['plan_text']

IndexError: list index out of range

In [54]:
#find page titles
i=0
for page in pages:
    try:
        title = (page['properties']['title']['title'][0]['plain_text'])
    except KeyError:
        try:
            title = (page['properties']['']['title'][0]['plain_text']) 
        except IndexError:
            title = ''
    print(f'{i}: {title}')

    i+=1

0: 10/24/23 Weekly Meeting
1: Meeting Notes Team Space
2: 10/19/23 Weekly Meeting
3: HarmonyHaven Security
4: HarmonyHaven TeamSpace 
5: AI Garden Journal
6: Secondary Research
7: Brand Identity Research
8: 10/17/23 - Weekly Meeting
9: 10/10/23 - Design & Development Meeting#1
10: 10/5/23 - Planning Phase Meeting
11: Planning Phase
12: Survey update - 10/03/23 
13: Overview of Tasks 
14: Planning Phase - 2nd Meeting
15: Kick-Off Meeting Notes - 10/1/23
16: 
17: Design & Development Phase
18: Project Management & Communication Tools
19: Development & Testing Phase
20: Final Touches & Demo Preparation
21: Survey Summary - 10/05/23


In [37]:
page_response_list = []

for page in pages:
    page_id = page['id']
    blocks_url = f'https://api.notion.com/v1/blocks/{page_id}/children'

    blocks_response = requests.get(blocks_url, headers=notion_request_headers)
    blocks = blocks_response.json()

    page_response_list.append(blocks)


# Events Data

### Breakout key pieces for structure

In [49]:
page_response_list[0]['results'][0]

{'object': 'block',
 'id': 'b2d81975-6683-464a-b05a-ca0d05517c9f',
 'parent': {'type': 'page_id',
  'page_id': 'ee3521d2-4eb8-4877-8269-f33a3f20b797'},
 'created_time': '2023-10-20T05:20:00.000Z',
 'last_edited_time': '2023-10-20T05:20:00.000Z',
 'created_by': {'object': 'user',
  'id': '73daee2c-ca1d-4f67-95f9-8fa2b09f7c76'},
 'last_edited_by': {'object': 'user',
  'id': '73daee2c-ca1d-4f67-95f9-8fa2b09f7c76'},
 'has_children': False,
 'archived': False,
 'type': 'heading_2',
 'heading_2': {'rich_text': [{'type': 'text',
    'text': {'content': 'Th, Oct 19, 2023 | ', 'link': None},
    'annotations': {'bold': False,
     'italic': False,
     'strikethrough': False,
     'underline': False,
     'code': False,
     'color': 'default'},
    'plain_text': 'Th, Oct 19, 2023 | ',
    'href': None},
   {'type': 'text',
    'text': {'content': 'Weekly Meetings - HarmonyHaven Hackathon',
     'link': {'url': 'https://www.google.com/calendar/event?eid=XzYwcTMwYzFnNjBvMzBlMWk2MG80YWMxZzYwcmo4Z

In [22]:
#block id
page_response_list[4]['results'][5]['id']

'cd4e4b85-d6d4-4668-bad5-de938eab7cd2'

In [23]:
#page id
if page_response_list[4]['results'][5]['parent']['type'] == 'page_id': #not sure if I need this, haven't seen other page IDs
    print(page_response_list[4]['results'][5]['parent']['page_id'])

0fa7ab57-628c-4fd9-b09e-5f0841d7fc54


In [24]:
#created date
page_response_list[4]['results'][5]['created_time']

'2023-10-11T14:21:00.000Z'

In [25]:
#creating user
page_response_list[4]['results'][5]['created_by']['id'] #also have a object/user type, not sure if I need.

'81a566e7-6828-4e3a-9d5a-65e0a627ce28'

In [26]:
#last edited time
page_response_list[4]['results'][5]['last_edited_time']

'2023-10-11T14:21:00.000Z'

In [27]:
#last edited by
page_response_list[4]['results'][5]['last_edited_by']['id'] #also have a object/user type, not sure if I need.

'81a566e7-6828-4e3a-9d5a-65e0a627ce28'

In [28]:
#type
type = page_response_list[4]['results'][5]['type']
type

'paragraph'

In [29]:
#contents
page_response_list[4]['results'][5][type]

{'rich_text': [{'type': 'text',
   'text': {'content': 'HarmonyHaven Meeting Notes', 'link': None},
   'annotations': {'bold': False,
    'italic': False,
    'strikethrough': False,
    'underline': False,
    'code': False,
    'color': 'default'},
   'plain_text': 'HarmonyHaven Meeting Notes',
   'href': None}],
 'color': 'default'}

### Create A Dataframe of Responses

In [90]:
#drill down to the page types and save to list
block_content_list = []

i=0
for page_response in page_response_list[19:]:
    k=0
    for block in page_response['results']:
        try:
            type = block['type']
            contents = block[type]
        except IndexError:
            type = ''
            contents = ''
        finally:
            if block['parent']['type'] == 'page_id': #not sure if I need this, haven't seen other page IDs
                page_id = block['parent']['page_id']
            block_data = {
                'block_id': block['id'],
                'page_id': page_id,
                'created_date': block['created_time'],
                'created_by': block['created_by']['id'],
                'last_edit_date': block['last_edited_time'],
                'last_edit_by': block['last_edited_by']['id'],
                'block_type': block['type'],
                'block_type': type,
                'block_contents': contents
                }
            
            #append to list
            block_content_list.append(block_data)
        k+=1
    i+=1

In [95]:
block_df = pd.DataFrame(block_content_list)

block_df.iloc[3,7]

{'rich_text': [{'type': 'text',
   'text': {'content': 'In conclusion, the survey results highlight the importance of shared virtual spaces, group task management, and real-time collaboration activities in a virtual team engagement tool. These features can enhance team interactions, facilitate task management, and promote collaboration.\n',
    'link': None},
   'annotations': {'bold': False,
    'italic': False,
    'strikethrough': False,
    'underline': False,
    'code': False,
    'color': 'default'},
   'plain_text': 'In conclusion, the survey results highlight the importance of shared virtual spaces, group task management, and real-time collaboration activities in a virtual team engagement tool. These features can enhance team interactions, facilitate task management, and promote collaboration.\n',
   'href': None}],
 'color': 'default'}

In [32]:
block_df['block_type'].value_counts()

block_type
bulleted_list_item    98
paragraph             91
numbered_list_item    29
divider               19
heading_2             16
to_do                 14
callout               10
child_page             8
table                  4
column_list            3
heading_1              3
quote                  3
image                  3
Name: count, dtype: int64

### Drop types that aren't valuable to me

In [33]:
#view table contents --> drop. Don't see the value here.
block_df[block_df['block_type'] == 'table'].iloc[0,7]

{'table_width': 3, 'has_column_header': True, 'has_row_header': False}

In [34]:
#view child_page contents --> keep
block_df[block_df['block_type'] == 'child_page'].iloc[0,7]

{'title': 'AI Garden Journal'}

In [35]:
#view divider contents --> drop
block_df[block_df['block_type'] == 'divider'].iloc[0,7]

{}

In [36]:
#view quote contents --> keep
block_df[block_df['block_type'] == 'quote'].iloc[0,7]

{'rich_text': [{'type': 'text',
   'text': {'content': 'Example questions to answer: ', 'link': None},
   'annotations': {'bold': True,
    'italic': False,
    'strikethrough': False,
    'underline': False,
    'code': False,
    'color': 'gray'},
   'plain_text': 'Example questions to answer: ',
   'href': None},
  {'type': 'text',
   'text': {'content': '\nWhat problem are we solving? For whom? When do they experience this issue?\nWhat data, research and feedback do we have that explains this problem? \nWhich customers are we working with, or hearing from, to better understand this problem? \nWhy is solving this problem urgent? Why is it important? ',
    'link': None},
   'annotations': {'bold': False,
    'italic': False,
    'strikethrough': False,
    'underline': False,
    'code': False,
    'color': 'gray'},
   'plain_text': '\nWhat problem are we solving? For whom? When do they experience this issue?\nWhat data, research and feedback do we have that explains this problem? \

In [37]:
#view column_list contents --> drop
block_df[block_df['block_type'] == 'column_list'].iloc[0,7]

{}

In [38]:
#view to_do contents --> keep
block_df[block_df['block_type'] == 'to_do'].iloc[0,7]

{'rich_text': [{'type': 'text',
   'text': {'content': 'Confirm the frequency of Team Meetings ',
    'link': None},
   'annotations': {'bold': False,
    'italic': False,
    'strikethrough': False,
    'underline': False,
    'code': False,
    'color': 'default'},
   'plain_text': 'Confirm the frequency of Team Meetings ',
   'href': None}],
 'checked': True,
 'color': 'default'}

In [39]:
#drop the columns we don't need
block_df = block_df[~block_df['block_type'].isin(['table','divider','column_list'])].reset_index(drop=True)

In [40]:
block_df.iloc[0,7]['rich_text'][0]

{'type': 'text',
 'text': {'content': 'Notion Tip:', 'link': None},
 'annotations': {'bold': True,
  'italic': False,
  'strikethrough': False,
  'underline': False,
  'code': False,
  'color': 'default'},
 'plain_text': 'Notion Tip:',
 'href': None}

Noticing when there are different annotations in the same block it splits them out by annotation. So I'll probably want to set it up to loop through the records in each content and combine them so it's one text field.

### Drill Into Example with Multiple Seperate Annotations

In [41]:
test_block = block_df[block_df['block_type'] == 'quote'].iloc[0,7]['rich_text']

storage_list = []
for i in test_block:
    content = i['plain_text']
    storage_list.append(content)

In [42]:
''.join(storage_list)

'Example questions to answer: \nWhat problem are we solving? For whom? When do they experience this issue?\nWhat data, research and feedback do we have that explains this problem? \nWhich customers are we working with, or hearing from, to better understand this problem? \nWhy is solving this problem urgent? Why is it important? '

# Putting Things All Together

If I can use these steps I can pull together metadata for each page and save the contents as a document. 

I see value in tracking the details I've already put together, but also in focusing on document summarization.

**Metadata Fields**
- page_id
- page_title
- created_date
- created_by
- last_edit_date
- last_edited_by

May also need to research running into limits, and batching in a way that lets me get every block.

In [131]:
pages[6]

{'object': 'page',
 'id': 'f682d891-f32b-43e0-9ee0-816708106592',
 'created_time': '2023-10-05T01:32:00.000Z',
 'last_edited_time': '2023-10-10T22:17:00.000Z',
 'created_by': {'object': 'user',
  'id': '6a405e7f-6ad0-4932-8f6b-f5bb2980d6b0'},
 'last_edited_by': {'object': 'user',
  'id': '73daee2c-ca1d-4f67-95f9-8fa2b09f7c76'},
 'cover': None,
 'icon': {'type': 'emoji', 'emoji': '🔬'},
 'parent': {'type': 'block_id',
  'block_id': 'fb6a8421-dbdf-4d6e-8059-7ff7f8263d62'},
 'archived': False,
 'properties': {'title': {'id': 'title',
   'type': 'title',
   'title': [{'type': 'text',
     'text': {'content': 'Secondary Research', 'link': None},
     'annotations': {'bold': False,
      'italic': False,
      'strikethrough': False,
      'underline': False,
      'code': False,
      'color': 'default'},
     'plain_text': 'Secondary Research',
     'href': None}]}},
 'url': 'https://www.notion.so/Secondary-Research-f682d891f32b43e09ee0816708106592',
 'public_url': None}

In [13]:
#########################
#reviewed the different responses and found some errors, ended up excluding databases. I want to use this mainly for page summarization. So those database entries will be less important in this context.
#may try to integrate it at somepoint
#########################

structured_document_list = []

for page in pages:
    #skip if database
    if page['parent']['type'] == 'database_id':
        continue

    #save metadata
    page_id = page['id']
    try: #titles come in different formats, so hopefully this will make it work!
        page_title = page['properties']['title']['title'][0]['plain_text']
    except KeyError:
        page_title = page['properties']['']['title'][0]['plain_text']

    page_created_date = page['created_time']
    page_created_by_id = page['created_by']['id']
    page_last_update_date = page['last_edited_time']
    page_last_update_by_id = page['last_edited_by']['id']

    #pull blocks from each page
    blocks_url = f'https://api.notion.com/v1/blocks/{page_id}/children'
    blocks_response = requests.get(blocks_url, headers=notion_request_headers)
    blocks = blocks_response.json()

    #drill into block content, add to list, and combine
    block_content_list = []

    n = 1
    previous_block = ''
    for block in blocks['results']:
        #save block type and content
        try:
            block_type = block['type']
        except IndexError:
            block_type = ''

        #skip if block_type one of the types we won't use
        if block_type in ['table','divider','column_list','child_page','image','']:
            continue

        #loop through annotations within block content of the types we want
        block_annotation_list = []
        for i in block[block_type]['rich_text']:
            try:
                content_item = i['plain_text']
                block_annotation_list.append(content_item)
            except KeyError:
                pass
        #add bullets and numbers if applicable
        if block_type == 'bulleted_list_item':    
            content = '\n-'+''.join(block_annotation_list) #combine seperated annotations
            previous_block = block_type #save off previous block to increment number in ordered fashion
        elif block_type == 'numbered_list_item':
            if previous_block == 'numbered_list_item':
                content = '\n'+str(n)+'.'+''.join(block_annotation_list) #combine seperated annotations
                previous_block = block_type #save off previous block to increment number in ordered fashion
                n += 1 #increment number
            else:
                n=1 #set n back to 1
                content = '\n'+str(n)+'.'+''.join(block_annotation_list) #combine seperated annotations
                previous_block = block_type #save off previous block to increment number in ordered fashion
                n += 1 #increment number
        else:
            content = '\n'+''.join(block_annotation_list) #combine seperated annotations
            previous_block = block_type #save off previous block to increment number in ordered fashion

        block_content_list.append(content) #append the combined items to list
        page_content = {
            'page_id': page_id,
            'page_title': page_title,
            'page_created_date': page_created_date,
            'page_created_by_id': page_created_by_id,
            'page_last_update_date': page_last_update_date,
            'page_last_update_by_id': page_last_update_by_id,
            'content': ''.join(block_content_list) #save content as one big combined block
        }

    structured_document_list.append(page_content)


In [32]:
filepath = './../data/'

for doc in structured_document_list[0:1]:  
    title = doc['page_title']
    filename = doc['page_title'].replace(" ","").replace("&","and").replace("#","") #remove chars that don't work in titles
    created_date = doc['page_created_date']
    content = doc['content']
    text_file = f'Title: {title}\nCreated On: {created_date}\n\n{content}'
    print(text_file)

Title: 10/24/23 Weekly Meeting
Created On: 2023-10-25T00:45:00.000Z


Tues, 10/24/23 | Weekly Meetings - HarmonyHaven Hackathon
Attendees: Emma Myint Kristy Deng Micah Fleming Brett Dickinson 

Vowel Recording 1: https://workspace-5b7pmm1.vowel.com/meeting/bNpYLTyGdoz2d3qr
Vowel Recording 2: https://workspace-qxm44vs.vowel.com/meeting/CVaD9RGEggeZgeDb

Tasks to Highlight
-Onboarding/Integration
-AI Journal
-Dashboard
Goals
-Finalize branding
-Finalize user flows
-Finalize which flows will have the integrated chat bot
-Delegate tasks for writing copy
-Review Micah's datapoints and research
Discussion Items
-Discuss integration of AI in chatbot and dashboard
-Integration of harmonyhaven into existing data management ecosystem (e.g. Microsoft suite, Google workspace)
-DESIGN SYSTEM CHECKLIST (example: https://atlassian.design/brand/mission )
-For content writing inspo: https://mobbin.com/browse/ios/apps
Action Items
Micah: start the slides and input his research to reflect the project, re

In [16]:
test_txt = structured_document_list[0]['content']

In [17]:
#test writing to txt file
with open('../data/dev/txt_test.txt', 'w') as f:
    f.write(test_txt)

### Build as Dataframe

We can then join in some of our other data, like user name and role.

In [52]:
pages_df = pd.DataFrame(structured_document_list)

pages_df.head()

Unnamed: 0,page_id,page_title,page_created_date,page_created_by_id,page_last_update_date,page_last_update_by_id,content
0,ee3521d2-4eb8-4877-8269-f33a3f20b797,10/19/23 Weekly Meeting,2023-10-20T05:19:00.000Z,73daee2c-ca1d-4f67-95f9-8fa2b09f7c76,2023-10-20T12:56:00.000Z,4c629a82-589e-46e2-8d2c-6f1edb7747ae,"Th, Oct 19, 2023 | Weekly Meetings - HarmonyHa..."
1,f99302ab-5823-4b41-91ec-ee19e76c9507,HarmonyHaven Security,2023-10-07T14:00:00.000Z,4c629a82-589e-46e2-8d2c-6f1edb7747ae,2023-10-20T11:22:00.000Z,4c629a82-589e-46e2-8d2c-6f1edb7747ae,
2,70435839-44c4-4b9b-83e5-e32bcc1b7953,HarmonyHaven TeamSpace,2023-10-04T17:15:00.000Z,73daee2c-ca1d-4f67-95f9-8fa2b09f7c76,2023-10-20T11:22:00.000Z,4c629a82-589e-46e2-8d2c-6f1edb7747ae,Brian Elliot is one of the judges for the Hack...
3,168dd073-847b-4c40-86fd-2ce2a38af9ff,AI Garden Journal,2023-10-09T02:44:00.000Z,6a405e7f-6ad0-4932-8f6b-f5bb2980d6b0,2023-10-20T11:22:00.000Z,4c629a82-589e-46e2-8d2c-6f1edb7747ae,Notion Tip: Here at Notion we use this templat...
4,f682d891-f32b-43e0-9ee0-816708106592,Secondary Research,2023-10-05T01:32:00.000Z,6a405e7f-6ad0-4932-8f6b-f5bb2980d6b0,2023-10-20T11:20:00.000Z,4c629a82-589e-46e2-8d2c-6f1edb7747ae,"This is great @Brett Dickinson, thanks for add..."


### Merge in User Names

In [36]:
pages_df = pages_df.merge(dim_user[['user_id', 'user_name', 'role']],how='left', left_on='page_created_by_id', right_on='user_id') #with created by id
pages_df.rename(columns={'user_name': 'created_by_user', 'role': 'created_by_user_role'}, inplace=True) #rename fields
pages_df.drop(columns=['user_id'], inplace=True) #drop user_id

pages_df = pages_df.merge(dim_user[['user_id', 'user_name', 'role']],how='left', left_on='page_last_update_by_id', right_on='user_id') #with last update by id
pages_df.rename(columns={'user_name': 'last_modified_by_user', 'role': 'last_modified_by_user_role'}, inplace=True) #rename fields
pages_df.drop(columns=['user_id'], inplace=True) #drop user_id

In [44]:
pages_df['page_title'].value_counts()

page_title
Survey update - 10/03/23                     2
10/19/23 Weekly Meeting                      1
HarmonyHaven Security                        1
HarmonyHaven TeamSpace                       1
AI Garden Journal                            1
Secondary Research                           1
Brand Identity Research                      1
Meeting Notes Team Space                     1
10/17/23 - Weekly Meeting                    1
10/10/23 - Design & Development Meeting#1    1
10/5/23 - Planning Phase Meeting             1
Planning Phase - 2nd Meeting                 1
Kick-Off Meeting Notes - 10/1/23             1
Project Management & Communication Tools     1
Survey Summary - 10/05/23                    1
Name: count, dtype: int64

### Convert to JSON

In [40]:
test = pages_df.to_json(orient='records')

In [43]:
with open('../data/dev/test_df_to_json.json', 'w') as json_output:
    json_output.write(test)

### Test Weird Duplication

In [17]:
page_19_id = pages[6]['id']

page_19_id

'0fa7ab57-628c-4fd9-b09e-5f0841d7fc54'

In [18]:
pages[6]['properties']['title']['title'][0]['plain_text']

'Meeting Notes Team Space'

In [19]:
test_url = f'https://api.notion.com/v1/blocks/{page_19_id}/children'

test_response = requests.get(test_url, headers=notion_request_headers)
test_blocks = test_response.json()['results']

In [10]:
test_blocks

[{'object': 'block',
  'id': 'ab74b5d3-d88b-4ea3-85a8-e99bdb58abbe',
  'parent': {'type': 'page_id',
   'page_id': 'f99302ab-5823-4b41-91ec-ee19e76c9507'},
  'created_time': '2023-10-07T14:00:00.000Z',
  'last_edited_time': '2023-10-07T14:00:00.000Z',
  'created_by': {'object': 'user',
   'id': '4c629a82-589e-46e2-8d2c-6f1edb7747ae'},
  'last_edited_by': {'object': 'user',
   'id': '4c629a82-589e-46e2-8d2c-6f1edb7747ae'},
  'has_children': True,
  'archived': False,
  'type': 'paragraph',
  'paragraph': {'rich_text': [], 'color': 'default'}}]

In [172]:
# with open('../data/index_7.json', 'w', encoding='utf8') as f:
#     json.dump(test_blocks, f, ensure_ascii=False, indent=4)  

# Ideas

For numbered or bulleted list, do a new paragraph and symbol at the start of each block object

In [25]:
page_with_numbers = test_blocks[8]

In [40]:
#test a loop through a few blocks with numbered records
block_content_list = []

n =1
previous_block = ''
for block in test_blocks[33:36]:
    #save block type and content
    try:
        block_type = block['type']
    except IndexError:
        block_type = ''

    #skip if block_type one of the types we won't use
    if block_type in ['table','divider','column_list','child_page','image','']:
        continue

    #loop through annotations within block content of the types we want
    block_annotation_list = []
    for i in block[block_type]['rich_text']:
        try:
            content_item = i['plain_text']
            block_annotation_list.append(content_item)
        except KeyError:
            pass
    #add bullets and numbers if applicable
    if block_type == 'bulleted_list_item':    
        content = '\n-'+''.join(block_annotation_list) #combine seperated annotations
        previous_block = block_type #save off previous block to increment number in ordered fashion
        print(previous_block)
    elif block_type == 'numbered_list_item':
        if previous_block == 'numbered_list_item':
            content = '\n'+str(n)+'.'+''.join(block_annotation_list) #combine seperated annotations
            previous_block = block_type #save off previous block to increment number in ordered fashion
            print(previous_block)
            n += 1 #increment number
        else:
            n=1 #set n back to 1
            content = '\n'+str(n)+'.'+''.join(block_annotation_list) #combine seperated annotations
            previous_block = block_type #save off previous block to increment number in ordered fashion
            print(previous_block)
            n += 1 #increment number
    else:
        content = ''.join(block_annotation_list) #combine seperated annotations
        previous_block = block_type #save off previous block to increment number in ordered fash
        print(previous_block)

    block_content_list.append(content) #append the combined items to list

paragraph
numbered_list_item
numbered_list_item


In [41]:
block_content_list

['3. They will have 3 links (features) they can click on',
 '\n1.Project (Tasks)',
 '\n2.Journal Entries (Team Progress)']

### Blank Security Page

In [47]:
security_page = pages[1]['id']

security_page

'f99302ab-5823-4b41-91ec-ee19e76c9507'

In [49]:
security_title = pages[1]['properties']['title']['title'][0]['plain_text']

security_title

'HarmonyHaven Security'

In [50]:
security_url = f'https://api.notion.com/v1/blocks/{security_page}/children'

sec_resp = requests.get(security_url, headers=notion_request_headers)
sec_blocks = sec_resp.json()['results']