In [1]:
import requests
import json           
import pandas as pd 
import os

from datetime import datetime
import urllib.parse

In [2]:
headers = {
    "accept": "application/json",
    "authorization": "Bearer 1/1204862582358995:58225b495568b45ac37d588db2fea9a6"
}

In [3]:
def asanaResToCSV(csvName, res):
    '''
        csvName: name of csv file in which data to dump
        res: response of the asana api
    '''
    resDict = json.loads(res.text) # String to dict
    df = pd.DataFrame.from_dict(resDict['data'])
    if not os.path.exists('./fetchedRecords'):
        os.makedirs('./fetchedRecords')
    df.to_csv(f'./fetchedRecords/{csvName}.csv', index= False)

In [4]:
workspace = "https://app.asana.com/api/1.0/workspaces"
resWorkspace = requests.get(workspace, headers=headers)
# print(resWorkspace.text) # type ==> 'dict' 
asanaResToCSV('workspace', resWorkspace)

In [5]:
projects = "https://app.asana.com/api/1.0/projects"
resProjects = requests.get(projects, headers=headers)
# print((resProjects.text)) # type ==> 'dict' 
asanaResToCSV('projects', resProjects)

In [6]:
# Getting the project ID for fetching sections and tasks
resProjectsDict = json.loads(resProjects.text)
projectID = resProjectsDict['data'][0]['gid']
projectID

'1204862567620455'

In [7]:
section = f'https://app.asana.com/api/1.0/projects/{projectID}/sections'
resSection = requests.get(section, headers=headers)
# print((resSection.text)) # type ==> 'dict' 
asanaResToCSV('section', resSection)

In [None]:
# fetching all modified and unmodified tasks 
tasks = f"https://app.asana.com/api/1.0/tasks?project={projectID}"
resTasks = requests.get(tasks, headers=headers)
# print((resTasks.text)) # type ==> 'dict' 
asanaResToCSV('tasks', resTasks)

In [13]:
current_datetime = datetime.utcnow().isoformat()
encoded_datetime = urllib.parse.quote(current_datetime)

try: # file exists ==> some etl has already performed 
    df_tasks = pd.read_csv('./fetchedRecords/tasks.csv') # Assumed that file is always present
    # print(f'Previous df\n{df_tasks}\n')    
except FileNotFoundError: # etl running for the first time 
    df_tasks = pd.DataFrame(columns=['gid', 'name', 'resource_type', 'resource_subtype', 'etl_date_created', 'etl_date_modified'])
    past_date = datetime(2000, 1, 1, 00, 00, 0000)
    last_modified = past_date.strftime('%Y-%m-%dT%H:%M:%S.%f%z')
else:
    last_modified = df_tasks['etl_date_modified'].max()

tasks = f"https://app.asana.com/api/1.0/tasks?project={projectID}&modified_since={last_modified}"
resTasks = requests.get(tasks, headers=headers) 
resDict = json.loads(resTasks.text) # String to dict
df_modifiedTasks = pd.DataFrame.from_dict(resDict['data'])
# print(f'fetched df\n{df_modifiedTasks}\n')
    
if len(df_modifiedTasks) == 0: # if no records are modifed since last_modified date 
    print('No modified records!!!')    
else:
    df_tasks['gid'] = df_tasks['gid'].astype(str)

    # Adding etl_date_created and etl_date_modified columns in fetched df
    df_modifiedTasks['etl_date_modified'] = encoded_datetime
    df_modifiedTasks = df_modifiedTasks.merge(df_tasks[['gid', 'etl_date_created']], on='gid', how='left')
    df_modifiedTasks['etl_date_created'].fillna(encoded_datetime, inplace=True)
    # print(f'processed fetched df\n{df_modifiedTasks}\n')

    # merging the two dfs
    df_merged = pd.concat([df_tasks, df_modifiedTasks]).drop_duplicates(subset='gid', keep='last')
    # print(f'final df\n{df_merged}\n')

    if not os.path.exists('./fetchedRecords'):
        os.makedirs('./fetchedRecords')        
    df_merged.to_csv(f'./fetchedRecords/tasks.csv', index=False)

No modified records!!!


In [88]:
current_datetime = datetime.utcnow().isoformat()
encoded_datetime = urllib.parse.quote(current_datetime)

df_tasks = pd.read_csv('./fetchedRecords/tasks.csv')
tasksDetails_list = []

try: # file exists ==> some etl has already performed 
    df_existedTaskDetails = pd.read_csv('./fetchedRecords/taskDetails.csv') # Assumed that file is always present
    # print(f'Previous df\n{df_existedTaskDetails}\n')    
except FileNotFoundError: # etl running for the first time 
    # getting gids of all tasks 
    gids_tasksToFetch = list(df_tasks['gid'])
else:
    # getting the gids of tasks which are modifed after taskDetail etl previous run
    df_taskAndTaskDetails = pd.merge(df_tasks, df_existedTaskDetails, on='gid', how = 'left', suffixes=('_df1', '_df2'))

    gids_modifiedTasks = list(df_taskAndTaskDetails[df_taskAndTaskDetails['etl_date_modified_df1'] > df_taskAndTaskDetails['etl_date_modified_df2']]['gid'])
    gids_newTasks = list(df_taskAndTaskDetails[df_taskAndTaskDetails['etl_date_modified_df2'].isnull()]['gid'])

    gids_tasksToFetch = gids_modifiedTasks + gids_newTasks  

df_tasks['gid'] = df_tasks['gid'].astype(str)
df_existedTaskDetails['gid'] = df_existedTaskDetails['gid'].astype(str)

# calling api for the modified task details only
for task_gid in gids_tasksToFetch:
    task = f"https://app.asana.com/api/1.0/tasks/{task_gid}"
    resTask = requests.get(task, headers=headers)
    resDict = json.loads(resTask.text) # String to dict
    taskDetails = pd.json_normalize(resDict['data'], max_level=0)
    tasksDetails_list.append(taskDetails)
df_newTaskDetails = pd.concat(tasksDetails_list)

# Adding etl_date_created and etl_date_modified columns in fetched df
df_newTaskDetails['etl_date_modified'] = encoded_datetime
df_newTaskDetails = df_newTaskDetails.merge(df_existedTaskDetails[['gid', 'etl_date_created']], on='gid', how='left')
df_newTaskDetails['etl_date_created'].fillna(encoded_datetime, inplace=True)    
print(f'processed fetched df\n{df_modifiedTasks.head()}\n')

# merging the two dfs
df_updatedTaskDetails = pd.concat([df_existedTaskDetails, df_newTaskDetails]).drop_duplicates(subset='gid', keep='last')
print(f'final df\n{df_updatedTaskDetails.head()}\n')

if not os.path.exists('./fetchedRecords'):
    os.makedirs('./fetchedRecords')        
df_merged.to_csv(f'./fetchedRecords/taskDetails.csv', index=False)

processed fetched df
                gid                           name resource_type  \
0  1204862567620465            Draft project brief          task   
1  1204862567620467                      renamed 1          task   
2  1204862567620469  Share timeline with teammates          task   
3  1205085348231103                    test task 1          task   
4  1205110993213486                    test task 2          task   

  resource_subtype               etl_date_modified  \
0     default_task  2023-07-21T14%3A09%3A19.511638   
1     default_task  2023-07-21T14%3A09%3A19.511638   
2     default_task  2023-07-21T14%3A09%3A19.511638   
3     default_task  2023-07-21T14%3A09%3A19.511638   
4     default_task  2023-07-21T14%3A09%3A19.511638   

                 etl_date_created  
0  2023-07-21T12%3A01%3A18.123008  
1  2023-07-21T12%3A01%3A18.123008  
2  2023-07-21T12%3A01%3A18.123008  
3  2023-07-21T12%3A01%3A18.123008  
4  2023-07-21T12%3A47%3A06.500749  

final df
                gid