In [2]:
from googleapiclient.http import MediaFileUpload, MediaIoBaseUpload
from google.oauth2 import service_account
from googleapiclient.discovery import build
import os
import time
import json
import io
from datetime import datetime
import pandas as pd


In [3]:
TYPE = os.environ["TYPE"]
PROJECT_ID = os.environ["PROJECT_ID"]
PRIVATE_KEY_ID = os.environ["PRIVATE_KEY_ID"]
PRIVATE_KEY = os.environ["PRIVATE_KEY"].replace('\\n','\n')
CLIENT_EMAIL = os.environ["CLIENT_EMAIL"]
CLIENT_ID = os.environ["CLIENT_ID"]
AUTH_URI = os.environ["AUTH_URI"]
TOKEN_URI = os.environ["TOKEN_URI"]
AUTH_PROVIDER = os.environ["AUTH_PROVIDER"]
CLIENT_CERT_URL = os.environ["CLIENT_CERT_URL"]
UNIVERSE_DOMAIN = os.environ["UNIVERSE_DOMAIN"]



google_json = {
  "type": TYPE,
  "project_id": PROJECT_ID,
  "private_key_id": PRIVATE_KEY_ID,
  "private_key": PRIVATE_KEY,
  "client_email": CLIENT_EMAIL,
  "client_id": CLIENT_ID,
  "auth_uri": AUTH_URI,
  "token_uri": TOKEN_URI,
  "auth_provider_x509_cert_url": AUTH_PROVIDER,
  "client_x509_cert_url": CLIENT_CERT_URL,
  "universe_domain": UNIVERSE_DOMAIN
}

google_json = json.dumps(google_json)
google_json = json.loads(google_json)

SCOPES = ['https://www.googleapis.com/auth/drive']
creds = service_account.Credentials.from_service_account_info(google_json, scopes=SCOPES)

In [5]:
def create_foler_in_google_drive(name):

    service = build('drive', 'v3', credentials=creds)
    
    # Create a folder
    file_metadata = {
        'name': name,
        'mimeType': 'application/vnd.google-apps.folder'
    }
    
    folder = service.files().create(body=file_metadata, fields='id').execute()
    print('Folder ID:', folder.get('id'))

create_foler_in_google_drive('Feedback')

Folder ID: 1-Z4ZVnyPhZLoIB6SIPU7TGJ8UHz9jWTh


In [50]:
def upload_file_to_google_drive(file_path, mime_type, parent_folder_id=None):

    service = build('drive', 'v3', credentials=creds)

    # File metadata
    file_metadata = {
        'name': os.path.basename(file_path),
    }
    if parent_folder_id:
        file_metadata['parents'] = [parent_folder_id]


    media = MediaFileUpload(file_path, mimetype=mime_type)

    # Upload file
    file = service.files().create(
        body=file_metadata,
        media_body=media,
        fields='id'
    ).execute()
    print(f"File ID: {file.get('id')}")


parent_folder_id = '1-Z4ZVnyPhZLoIB6SIPU7TGJ8UHz9jWTh'
file_path = 'data/ground_truth.xlsx'
mime_type = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
upload_file_to_google_drive(file_path, mime_type,parent_folder_id)

File ID: 1CFcGdeyUnLXPdx_XAl0V0w7BtGRfEuuW


In [21]:
def upload_json_to_google_drive(json_to_upload,name,parent_folder_id = None):

    service = build('drive', 'v3', credentials=creds)

    
    json_str = json.dumps(json_to_upload)
    file_stream = io.BytesIO(json_str.encode('utf-8'))

    # File metadata
    file_metadata = {
   'name': '{}.json'.format(name),
   'mimeType': 'application/json'
    }
    
    if parent_folder_id:
        file_metadata['parents'] = [parent_folder_id]


    media = MediaIoBaseUpload(file_stream, mimetype='application/json')

    # Upload file
    file = service.files().create(
    body=file_metadata,
    media_body=media,
    fields='id'
    ).execute()
    print(f"File ID: {file.get('id')}")


upload_json_to_google_drive({'key':'value'},'north')

File ID: 1DT9x3X6xur1AdimsPRF0r9eyQjnDr8yM


In [3]:
def list_files_in_google_drive():

    service = build('drive', 'v3', credentials=creds)

    results = service.files().list(
        pageSize=10, fields="nextPageToken, files(id, name)"
    ).execute()
    items = results.get('files', [])
    if not items:
        print('No files found.') 
    else:
        print('Files:')
        for item in items:
            print(f"{item['name']} ({item['id']})")
list_files_in_google_drive()

Files:
18:35:20.json (1WsIVNzOYpILaWWUw00kVG-1DMEy2Canb)
00:58:01.json (1bodTRDSzhwt9iT0HelpLA8Iij7YkMR8R)
00:05:53.json (1iiCM4T7JDlCGeM-bW_wG0StEByhDpx2z)
ground_truth.xlsx (1CFcGdeyUnLXPdx_XAl0V0w7BtGRfEuuW)
GroundTruth (1wd2yHup7ExpSRAmqgmaH8ELAfq04Lybx)
Feedback (1i307DYh9OFoPC6AkxI1rTwrhC0J3HKO-)


In [94]:
def list_files_in_google_drive_folder(folder_id):

    service = build('drive', 'v3', credentials=creds)
    results = service.files().list(
        q=f"'{folder_id}' in parents",
        pageSize=10,  # Adjust as needed
        fields="nextPageToken, files(id, name)"
    ).execute()
    items = results.get('files', [])
    ids_list = []
    if not items:
        print('No files found.')
    else:
        print('Files:')
        for item in items:
            print(f"{item['name']} ({item['id']})")
            ids_list.append(item['id'])

    return ids_list

folder_id = '1-Z4ZVnyPhZLoIB6SIPU7TGJ8UHz9jWTh'
list_files_in_google_drive_folder(folder_id)

Files:
23:20:51.json (1u44cv_FQ-QJHXzpNuhuAjBoDMyCT0PBa)
23:20:45.json (1Qeo5gqYOQQQ6jXyMgklfmZuxxjV02NQd)
22:55:17.json (10DjEBTwJsZO50P3rDkCwWWFRRvaONoE_)


['1u44cv_FQ-QJHXzpNuhuAjBoDMyCT0PBa',
 '1Qeo5gqYOQQQ6jXyMgklfmZuxxjV02NQd',
 '10DjEBTwJsZO50P3rDkCwWWFRRvaONoE_']

In [49]:
def list_folders_in_google_drive():

    service = build('drive', 'v3', credentials=creds)
    results = service.files().list(
        q="mimeType='application/vnd.google-apps.folder'",
        pageSize=10,  # Adjust as needed
        fields="nextPageToken, files(id, name)"
    ).execute()
    items = results.get('files', [])

    if not items:
        print('No folders found.')
    else:
        print('Folders:')
        for item in items:
            print(f"{item['name']} ({item['id']})")

list_folders_in_google_drive()


Folders:
GroundTruth (1wd2yHup7ExpSRAmqgmaH8ELAfq04Lybx)
Feedback (1i307DYh9OFoPC6AkxI1rTwrhC0J3HKO-)


In [4]:
def delete_all_files_and_folders():

    service = build('drive', 'v3', credentials=creds)
    page_token = None

    while True:
        response = service.files().list(q="trashed=false",
                                        spaces='drive',
                                        fields='nextPageToken, files(id, name)',
                                        pageToken=page_token).execute()

        files = response.get('files', [])
        if not files:
            print("No files found.")
            break

        for file in files:
            try:
                service.files().delete(fileId=file['id']).execute()
                print(f"Deleted file: {file['name']} (ID: {file['id']})")
                time.sleep(0.1)  # Sleep to avoid hitting the rate limit
            except Exception as e:
                print(f"An error occurred: {e}")

        page_token = response.get('nextPageToken', None)
        if page_token is None:
            break

delete_all_files_and_folders()

Deleted file: 22:44:51.json (ID: 1LZzxvOKGyPFy5--olbU392p1EY4xODsP)
Deleted file: 22:43:58.json (ID: 1Z7EVvDY_XAL52Kv7DwT38R8I5Vs0vVDv)
Deleted file: 22:41:07.json (ID: 1Z_zt07NKi7wpDNIuUpdWEfdyHQC5gF5R)
Deleted file: 22:36:38.json (ID: 1slF1m6-XndBV2k26RySw-K8q5yepkuaI)
Deleted file: 22:36:12.json (ID: 1msUl9Df89CiUuadVlnwv_7QtaAQiGlc6)
Deleted file: Feedback (ID: 18RwkJwasmQ1KQtfvQHIFg9OJUbDHFGCq)


In [None]:
def delete_files_google_drive(file_ids = []):
    service = build('drive', 'v3', credentials=creds)
    for file_id in file_ids:
        try:
            # Delete the file
            service.files().delete(fileId=file_id).execute()
            print('File deleted successfully')
        except Exception as e:
            print('An error occurred:', e)

In [85]:
list_files_in_google_drive_folder(folder_id)

Files:
22:55:17.json (10DjEBTwJsZO50P3rDkCwWWFRRvaONoE_)


In [84]:
def load_json_from_google_drive(file_id):

    service = build('drive', 'v3', credentials=creds)
    try:
        response = service.files().get_media(fileId=file_id)

        content = response.execute()

        return content

    except Exception as e:
        print(f'An error occurred: {e}')
        return None

In [121]:
def show_feedback(folder_id = '1-Z4ZVnyPhZLoIB6SIPU7TGJ8UHz9jWTh'):
    feedback_ids = list_files_in_google_drive_folder(folder_id)
    feedback_output = {}
    for feedback_id in feedback_ids:    
        loaded_json = load_json_from_google_drive(feedback_id)
        json_content_dict = json.loads(loaded_json.decode('utf-8'))
        print('JSON content as dictionary:')
        print(json_content_dict)
        for k,v in json_content_dict.items():
            if k in feedback_output:
                feedback_output[k].append(v)
            else:
                feedback_output[k] = [v]

    df = pd.DataFrame(feedback_output).apply(pd.Series.explode).reset_index(drop = True)
    
    return feedback_ids, df


In [115]:
_, df = show_feedback()
df

Files:
23:36:58.json (1c7KtoO-xTM9A411ZIdZBgS1wEfilpQgQ)
23:20:51.json (1u44cv_FQ-QJHXzpNuhuAjBoDMyCT0PBa)
23:20:45.json (1Qeo5gqYOQQQ6jXyMgklfmZuxxjV02NQd)
22:55:17.json (10DjEBTwJsZO50P3rDkCwWWFRRvaONoE_)
['1c7KtoO-xTM9A411ZIdZBgS1wEfilpQgQ', '1u44cv_FQ-QJHXzpNuhuAjBoDMyCT0PBa', '1Qeo5gqYOQQQ6jXyMgklfmZuxxjV02NQd', '10DjEBTwJsZO50P3rDkCwWWFRRvaONoE_']
JSON content as dictionary:
{'thumb': ['üëé', 'üëç', 'üëç'], 'feedback': ['asdf', 'abc', 'feedback'], 'user_input': ['def', 'aabc', 'abc'], 'llm_output': ['Not confident enough to generate prompt ', 'Not confident enough to generate prompt ', 'Not confident enough to generate prompt ']}
JSON content as dictionary:
{'thumb': 'üëé', 'feedback': 'asdf', 'user_input': 'def', 'llm_output': 'Not confident enough to generate prompt '}
JSON content as dictionary:
{'thumb': 'üëç', 'feedback': 'abc', 'user_input': 'aabc', 'llm_output': 'Not confident enough to generate prompt '}
JSON content as dictionary:
{'thumb': 'üëç', 'feedback': 'feed

Unnamed: 0,thumb,feedback,user_input,llm_output
0,üëé,asdf,def,Not confident enough to generate prompt
0,üëç,abc,aabc,Not confident enough to generate prompt
0,üëç,feedback,abc,Not confident enough to generate prompt
1,üëé,asdf,def,Not confident enough to generate prompt
2,üëç,abc,aabc,Not confident enough to generate prompt
3,üëç,feedback,abc,Not confident enough to generate prompt


In [129]:
def merge_json_feedback(folder_id = '1-Z4ZVnyPhZLoIB6SIPU7TGJ8UHz9jWTh'):
    feedback_ids, df = show_feedback(folder_id)
    feedback_dict = {col: df[col].tolist() for col in df.columns}
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    try:
        upload_json_to_google_drive(feedback_dict,current_time,parent_folder_id='1-Z4ZVnyPhZLoIB6SIPU7TGJ8UHz9jWTh') # Folder to store feedback
        delete_files_google_drive(feedback_ids)
    except Exception as e:
        print(f'An error occurred: {e}')

In [133]:
merge_json_feedback()

Files:
23:58:36.json (1Le46L3MYEoC59Varb16KsLqJRlJBTj3V)
23:56:01.json (1I73L9iaXrrUi2JBon1I7pBJgE7ihr0gB)
23:36:58.json (1c7KtoO-xTM9A411ZIdZBgS1wEfilpQgQ)
23:20:51.json (1u44cv_FQ-QJHXzpNuhuAjBoDMyCT0PBa)
23:20:45.json (1Qeo5gqYOQQQ6jXyMgklfmZuxxjV02NQd)
22:55:17.json (10DjEBTwJsZO50P3rDkCwWWFRRvaONoE_)
JSON content as dictionary:
{'thumb': ['üëé', 'üëç', 'üëç', 'üëé', 'üëç', 'üëç', 'üëé', 'üëç', 'üëç', 'üëé', 'üëç', 'üëç'], 'feedback': ['asdf', 'abc', 'feedback', 'asdf', 'abc', 'feedback', 'asdf', 'abc', 'feedback', 'asdf', 'abc', 'feedback'], 'user_input': ['def', 'aabc', 'abc', 'def', 'aabc', 'abc', 'def', 'aabc', 'abc', 'def', 'aabc', 'abc'], 'llm_output': ['Not confident enough to generate prompt ', 'Not confident enough to generate prompt ', 'Not confident enough to generate prompt ', 'Not confident enough to generate prompt ', 'Not confident enough to generate prompt ', 'Not confident enough to generate prompt ', 'Not confident enough to generate prompt ', 'Not con

In [136]:
list_files_in_google_drive_folder('1-Z4ZVnyPhZLoIB6SIPU7TGJ8UHz9jWTh')

Files:
00:05:53.json (1iiCM4T7JDlCGeM-bW_wG0StEByhDpx2z)


['1iiCM4T7JDlCGeM-bW_wG0StEByhDpx2z']