In [2]:
from openai import OpenAI
import os
import time
import pandas as pd
import re
from tqdm import tqdm

In [3]:
knowledge_folder = 'Knowledge'
knowledge_dict = {}

for filename in os.listdir(knowledge_folder):
    if filename != 'Knowledge_all.md' and filename.endswith('.md'):
        file_path = os.path.join(knowledge_folder, filename)
        aspect = filename.replace('.md', '').replace('Knowledge_', '')
        with open(file_path, 'r') as f:
            knowledge_dict[aspect] = f.read()
            
km_df = pd.read_csv('Knowledge/KM_DF.csv')

In [4]:
knowledge_dict.keys()

dict_keys(['Tax Transparency', 'Accounting', 'Pay', 'Board', 'Business Ethics', 'Ownership & Control'])

In [5]:
def Key_Metrics_Assessment(governance_file,knowledge_dict):
    if governance_file.endswith('.txt'):
        gf_type = 'txt'
    elif governance_file.endswith('.pdf'):
        gf_type = 'pdf'
    
    client = OpenAI(api_key='*********')
    
    file = client.files.create(
      file=open(governance_file, "rb"),
      purpose="assistants",
    )
    
    # vs = client.beta.vector_stores.create(
    #     file_ids=[file.id]
    # )

    assistant = client.beta.assistants.create(
        name="KMs_Assessment",
        instructions="You are a helpful assistance",
        model="gpt-4o-mini",
        # file_ids = [file.id],
        tools=[{"type": "file_search"}],
        # tool_resources={"file_search": {"vector_store_ids":[vs.id]}},
        temperature = 0.2
    )
    
    thread = client.beta.threads.create()
    
    prompt = f"""
    Evaluate all the Key Metrics mentioned above for the company in the {gf_type} file, the answer for each Key Metric should include four parts: Socre Definition, Assessment, Flagged, Score. If the Key Metrics not be flagged the score should be 0.0, or the score should be one of the "Typical Scoring Contribution" depends on the situation. If the relative content of a key metric is missing from the document, that metric should be flagged and the score should be the "Typical Scoring Contribution.
    """
    
    def submit_message(assistant_id, thread, KMs,file_ids=None):
        params = {
            'thread_id': thread.id,
            'role': 'user',
            'content': KMs+'\n\n'+prompt,
        }
        if file_ids:
            params['attachments']=[{"file_id": file.id, "tools": [{"type": "file_search"}]}]
        client.beta.threads.messages.create(
            **params
        )
        return client.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=assistant.id,
        )
    
    def get_response(thread):
        return client.beta.threads.messages.list(thread_id=thread.id)
    
    def wait_on_run(run, thread):
        while run.status == "queued" or run.status == "in_progress":
            run = client.beta.threads.runs.retrieve(
                thread_id=thread.id,
                run_id=run.id,
            )
            time.sleep(0.5)
        return run
    print("Assessing Board ......")
    run = submit_message(assistant.id, thread, KMs=knowledge_dict['Board'],file_ids=file.id)
    wait_on_run(run,thread)
    print("Assessing Pay ......")
    run = submit_message(assistant.id, thread, KMs=knowledge_dict['Pay'])
    wait_on_run(run,thread)
    print("Assessing Ownership & Control ......")
    run = submit_message(assistant.id, thread, KMs=knowledge_dict['Ownership & Control'])
    wait_on_run(run,thread)
    print("Assessing Accounting ......")
    run = submit_message(assistant.id, thread, KMs=knowledge_dict['Accounting'])
    wait_on_run(run,thread)
    print("Assessing Business Ethics ......")
    run = submit_message(assistant.id, thread, KMs=knowledge_dict['Business Ethics'])
    wait_on_run(run,thread)
    print("Assessing Tax Transparency ......")
    run = submit_message(assistant.id, thread, KMs=knowledge_dict['Tax Transparency'])
    wait_on_run(run,thread)
    
    messages = get_response(thread)
    
    client.beta.assistants.delete(assistant_id=assistant.id)
    # client.beta.vector_stores.delete(vs.id)
    client.beta.threads.delete(thread_id=thread.id)
    return messages

In [6]:
def Key_Metrics_Assessment_one(governance_file, km_df):
    if governance_file.endswith('.txt'):
        gf_type = 'txt'
    elif governance_file.endswith('.pdf'):
        gf_type = 'pdf'
    
    client = OpenAI(api_key='*********')  # 替换为你的 API 密钥
    
    file = client.files.create(
        file=open(governance_file, "rb"),
        purpose="assistants",
    )
    
    assistant = client.beta.assistants.create(
        name="KMs_Assessment",
        instructions="You are a helpful assistant",
        model="gpt-4o-mini",
        tools=[{"type": "file_search"}],
        temperature=0.2
    )
    
    thread = client.beta.threads.create()
    
    prompt = f"""
    Evaluate the Key Metric mentioned above for the company in the {gf_type} file. The answer for each Key Metric should include four parts: Score Definition, Assessment, Flagged, Score. If the Key Metrics are not flagged, the score should be 0.0, or the score should be one of the "Typical Scoring Contribution" depending on the situation. If the relevant content of a key metric is missing from the document, that metric should be flagged and the score should be the "Typical Scoring Contribution."
    """
    
    all_messages = []  # 用于存储所有的消息

    def submit_message(assistant_id, thread, KM, file_ids=None):
        params = {
            'thread_id': thread.id,
            'role': 'user',
            'content': KM + '\n\n' + prompt,
        }
        if file_ids:
            params['attachments'] = [{"file_id": file.id, "tools": [{"type": "file_search"}]}]
        client.beta.threads.messages.create(**params)
        return client.beta.threads.runs.create(
            thread_id=thread.id,
            assistant_id=assistant.id,
        )
    
    def get_response(thread):
        response = client.beta.threads.messages.list(thread_id=thread.id)
        all_messages.extend(response.data)  # 将新消息追加到 all_messages 列表中
        return response
    
    def wait_on_run(run, thread):
        while run.status == "queued" or run.status == "in_progress":
            run = client.beta.threads.runs.retrieve(
                thread_id=thread.id,
                run_id=run.id,
            )
            time.sleep(0.5)
        return run
    
    # 第一次运行并获取消息
    run = submit_message(assistant.id, thread, KM=str(km_df.iloc[0].to_dict()), file_ids=file.id)
    wait_on_run(run, thread)
    get_response(thread)  # 获取消息并存储

    # 使用 tqdm 为循环添加进度条，并在每次运行后获取消息
    for i in tqdm(range(1, len(km_df)), desc="Processing Key Metrics"):
        run = submit_message(assistant.id, thread, KM=str(km_df.iloc[i].to_dict()))
        wait_on_run(run, thread)
        get_response(thread)  # 获取消息并存储
    
    # 删除助手和线程
    client.beta.assistants.delete(assistant_id=assistant.id)
    client.beta.threads.delete(thread_id=thread.id)
    
    return all_messages  # 返回所有的消息记录

In [7]:
def Key_Metrics_Assessment_flexi(governance_file, km_df,batch_size=10):
    if governance_file.endswith('.txt'):
        gf_type = 'txt'
    elif governance_file.endswith('.pdf'):
        gf_type = 'pdf'
    
    client = OpenAI(api_key='*********')  # 替换为你的 API 密钥
    
    file = client.files.create(
        file=open(governance_file, "rb"),
        purpose="assistants",
    )
    
    assistant = client.beta.assistants.create(
        name="KMs_Assessment",
        instructions="You are a helpful assistant",
        model="gpt-4o-mini",
        tools=[{"type": "file_search"}],
        temperature=0.2
    )
    
    # thread = client.beta.threads.create()
    
    prompt = f"""
    Evaluate the Key Metrics mentioned above for the company in the {gf_type} file. The answer for each Key Metric should only include four parts: Score Definition, Assessment, Flagged, Score. If the Key Metrics are not flagged, the score should be 0.0, or the score should be one of the "Typical Scoring Contribution" depending on the situation. If the relevant content of a key metric is missing from the document, that metric should be flagged and the score should be the "Typical Scoring Contribution. The number of the Key Metricis in your answer should be the same as the number I gave you"
    """
    
    all_messages = []  # 用于存储所有的消息

    def submit_message(assistant_id, thread, KM, file_ids=None):
        params = {
            'thread_id': thread.id,
            'role': 'user',
            'content': KM + '\n\n' + prompt,
        }
        if file_ids:
            params['attachments'] = [{"file_id": file.id, "tools": [{"type": "file_search"}]}]
        client.beta.threads.messages.create(**params)
        return client.beta.threads.runs.create(
            thread_id=thread.id,
            assistant_id=assistant.id,
        )
    
    def get_response(thread):
        response = client.beta.threads.messages.list(thread_id=thread.id)
        all_messages.extend(response.data)  # 将新消息追加到 all_messages 列表中
        return response
    
    def wait_on_run(run, thread):
        while run.status == "queued" or run.status == "in_progress":
            run = client.beta.threads.runs.retrieve(
                thread_id=thread.id,
                run_id=run.id,
            )
            time.sleep(0.5)
        return run
    def row_str(row):
        strbatch = str()
        strbatch += f"Key Metrics Name: {row['Key Metrics Name']} [{row['Group']}]\n\n"
        strbatch += 'Score Definition:\n' + row['Score Definition'] + '\n\n'
        strbatch += 'Measurement Method:\n' + row['Measurement Method'] + '\n\n'
        strbatch += 'Typical Scoring Contribution:\n' + row['Typical Scoring Contribution'] + '\n'
        strbatch += '------\n\n'
        return strbatch
    
    # 第一次运行并获取消息
    # thread = client.beta.threads.create()
    # run = submit_message(assistant.id, thread, KM=row_str(km_df.iloc[0]), file_ids=file.id)
    # wait_on_run(run, thread)
    # get_response(thread)  # 获取消息并存储
    # client.beta.threads.delete(thread_id=thread.id)
    
    # 遍历 DataFrame，按照每 10 行分组处理
    for start in tqdm(range(0, len(km_df), batch_size), desc="Processing Key Metrics"):
        # thread = client.beta.threads.create()
        batch = km_df.iloc[start:start+batch_size]  # 每次获取 10 行的子集
        strbatch = str()
        # 对每个批次进行处理
        for _, row in batch.iterrows():
            strbatch += row_str(row)
        # run = submit_message(assistant.id, thread, KM=strbatch, file_ids=file.id)
        # wait_on_run(run, thread)
        # get_response(thread)  # 获取消息并存储
        # client.beta.threads.delete(thread_id=thread.id)
        retries = 0
        success = False
        
        while retries < 3 and not success:
            try:
                thread = client.beta.threads.create()  # 每次创建新线程
                run = submit_message(assistant.id, thread, KM=strbatch, file_ids=file.id)
                wait_on_run(run, thread)
                get_response(thread)
                success = True  # 如果成功，则标记为 True
            except Exception as e:
                retries += 1
                print(f"Error processing batch starting at index {start}, retry {retries}/{3}: {e}")
            finally:
                client.beta.threads.delete(thread_id=thread.id)  # 确保线程被删除

    
    # 删除助手和线程
    client.beta.assistants.delete(assistant_id=assistant.id)
    
    return all_messages  # 返回所有的消息记录

In [8]:
def Summarizer(KMs_assessment):
    client = OpenAI(api_key='*********')
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role":"system","content":"You are a helpful assistance"},
            {"role":"user","content":KMs_assessment+'\n\n'+'Summarise the assessment of all the above key indicators and write a professional and coherent report on the themes of the Governance Indicators Report, which should be presented in professional and pertinent language, with one paragraph for each main theme, without the use of bullet points, and ignoring the scores. Suggestions for improvement should also be made for key indicators where relevant information is lacking in the document. The paragraph for each theme should contain an evaluation of which indicators under the changed theme performed well, which were inadequate and which did not provide information.'}
        ]
    )
    response = completion.choices[0].message.content
    return response

In [9]:
def remove_score(Assessment):
    lines = Assessment.splitlines()
    # 过滤掉包含 "Score:" 的行
    filtered_lines = [line for line in lines if "Score:" not in line]  
    # 将过滤后的行重新组合为一个字符串
    filtered_content_str = "\n".join(filtered_lines)
    return filtered_content_str

In [19]:
def Score_Rating(name_summary_df, batch_size=None, Example=None, Assessment=False):
    client = OpenAI(api_key='*********')
 
    # Create a single assistant that will be used for all batches or the full data
    assistant = client.beta.assistants.create(
        name="Score Rater",
        instructions="You are a fair and rigorous grader, and you are able to rate the company's governance performance on a 100-point scale based on the summary.",
        model="gpt-4o-mini",
        temperature=0.2
    )
    
    prompt = "Based on the governance performance above, you need to show the details of the rating. Use a point system instead of a deduction system. Regardless of the rating level, the lowest score is 0 and cannot be negative. The total score should be given in the format '(\d+)\s+out\s+of\s+100' so that I can use regex to extract it."
    
    def submit_message(assistant_id, thread, summary):
        params = {
            'thread_id': thread.id,
            'role': 'user',
            'content': summary + '\n\n' + prompt,
        }
        client.beta.threads.messages.create(**params)
        return client.beta.threads.runs.create(
            thread_id=thread.id,
            assistant_id=assistant_id,
        )
    
    def get_response(thread):
        return client.beta.threads.messages.list(thread_id=thread.id)
    
    def wait_on_run(run, thread):
        while run.status == "queued" or run.status == "in_progress":
            run = client.beta.threads.runs.retrieve(
                thread_id=thread.id,
                run_id=run.id,
            )
            time.sleep(0.5)
        return run
    
    def process_data(data_df, Example):
        thread = client.beta.threads.create()  # Create a new thread for each batch or full data
        all_responses = []  # Store each message content
        
        if Example:
            start_prompt = "Learn the relationship between the text summary and the score from the example below (format: {'Text for Rating':'Score','Text for Rating':'Score',...}), focusing on how different issues affect the rating weight.\n\n" + Example
            run = submit_message(assistant.id, thread, start_prompt)
            wait_on_run(run, thread)
        
        score_list = []
        for i in range(len(data_df)):
            score_dict = {}
            name = data_df.iloc[i].values[0]
            score_dict['Name'] = name
            summary = data_df.iloc[i].values[1]
            if Assessment:
                summary = remove_score(summary)
                print('Score removed')
            print(f"Rating for {name}......")
            retries = 0
            success = False
            
            while retries < 3 and not success:
                try:
                    run = submit_message(assistant.id, thread, summary)
                    wait_on_run(run, thread)
                    messages = get_response(thread)
                    response = messages.data[0].content[0].text.value
                    all_responses.append(response)  # Store each message content
                    success = True
                except Exception as e:
                    retries += 1
                    print(f"Error processing, retry {retries}/3: {e}")
            
            score_dict['Score_detail'] = response
            match = re.search(r'\(?(\d+)\)?\s+out\s+of\s+100', response)
            if match:
                score = match.group(1)
            else:
                score = 0
            score_dict['Score'] = score
            score_list.append(score_dict)
        
        client.beta.threads.delete(thread_id=thread.id)  # Delete the thread after processing
        
        return pd.DataFrame(score_list), all_responses
    
    # Initialize variables to store all results
    all_messages = []
    final_score_df = pd.DataFrame()
    
    # If batch_size is None, process all data at once
    if batch_size is None:
        score_df, messages = process_data(name_summary_df, Example)
        final_score_df = score_df
        all_messages.extend(messages)
    else:
        # Process in batches
        batches = [name_summary_df[i:i + batch_size] for i in range(0, len(name_summary_df), batch_size)]
        
        for batch in batches:
            batch_df, messages = process_data(batch, Example)
            final_score_df = pd.concat([final_score_df, batch_df], ignore_index=True)
            all_messages.extend(messages)
    
    # Delete the assistant after all processing is done
    client.beta.assistants.delete(assistant_id=assistant.id)
    
    return all_messages, final_score_df

In [11]:
# def Score_Rating(name_summary_df,Example = None,Assessment = False):
#     client = OpenAI(api_key='*********')
# 
#     assistant = client.beta.assistants.create(
#         name="Score Rater",
#         instructions="You are a fair and rigorous grader, and you are able to rate the company's governance performance on a 100-point scale based on the summary.",
#         model="gpt-4o-mini",
#         temperature = 0.2
#     )
# 
#     thread = client.beta.threads.create()
# 
#     prompt = "Rating based on governance performance above, you need to show the detail of the rating. Use a points system instead of a deduction system, no matter what level of rating, the lowest score is 0 and cannot be negative. The total score should be given as '(\d+)\s+out\s+of\s+100' this format, so that i can use re to extract"
# 
#     if Example:
#         start_prompt = "Learn the relationship between the text summary and the Score from Example below(the format of Example is {'Text for Rating':'Score','Text for Rating':'Score',...}), you should focus on different issues weight on the rating.\n\n" + Example
# 
#     def submit_message(assistant_id, thread, summary):
#         params = {
#             'thread_id': thread.id,
#             'role': 'user',
#             'content': summary+'\n\n'+prompt,
#         }
# 
#         client.beta.threads.messages.create(
#             **params
#         )
#         return client.beta.threads.runs.create(
#         thread_id=thread.id,
#         assistant_id=assistant.id,
#         )
# 
#     def get_response(thread):
#         return client.beta.threads.messages.list(thread_id=thread.id)
# 
#     def wait_on_run(run, thread):
#         while run.status == "queued" or run.status == "in_progress":
#             run = client.beta.threads.runs.retrieve(
#                 thread_id=thread.id,
#                 run_id=run.id,
#             )
#             time.sleep(0.5)
#         return run
# 
#     if Example:
#         run = submit_message(assistant.id,thread,start_prompt)
#         wait_on_run(run,thread)
# 
#     score_list = []
#     for i in range(len(name_summary_df)):
#         score_dict = {}
#         name = name_summary_df.iloc[i].values[0]
#         score_dict['Name'] = name
#         summary = name_summary_df.iloc[i].values[1]
#         if Assessment:
#             summary = remove_score(summary)
#             print('Remove score')
#         print("Rating for "+name+"......" )
#         retries = 0
#         success = False
# 
#         while retries < 3 and not success:
#             try:
#                 run = submit_message(assistant.id,thread,summary)
#                 wait_on_run(run,thread)
#                 messages = get_response(thread)
#                 response = messages.data[0].content[0].text.value
#                 success = True  # 如果成功，则标记为 True
#             except Exception as e:
#                 retries += 1
#                 print(f"Error processing , retry {retries}/{3}: {e}")
#         score_dict['Score_detail'] = response
#         match = re.search(r'\(?(\d+)\)?\s+out\s+of\s+100', response)
#         if match:
#             score = match.group(1)
#         else:
#             score = 0
#         score_dict['Score'] = score
#         score_list.append(score_dict)
#     score_df = pd.DataFrame(score_list)
# 
#     client.beta.assistants.delete(assistant_id=assistant.id)
#     client.beta.threads.delete(thread_id=thread.id)
#     return messages, score_df

In [12]:
def Rating_from_assessment(assessment):
    # 使用正则表达式查找所有“**Score:**”后的数字
    matches = re.findall(r'\*\*Score:\*\*\s*(-?[0-9]*\.?[0-9]+)', assessment)
    
    # 将匹配到的数字转换为浮点数，并放入列表中
    scores = [float(match) for match in matches]
    print(scores)
    # 求和
    total_score = sum(scores)

    return  total_score

In [15]:
# def main(governance_file,knowledge_dict):
#     responses = Key_Metrics_Assessment(governance_file,knowledge_dict)
#     all_KMs_assessment = str()
#     for i,aspect in zip(range(0,11,2),['Tax Transparency', 'Business Ethics', 'Accounting', 'Ownership & Control', 'Pay', 'Board']):
#         all_KMs_assessment = all_KMs_assessment+'\n\n'+aspect+':\n'+responses.data[i].content[0].text.value
#     summary = Summarizer(all_KMs_assessment)
#     return all_KMs_assessment,summary

In [13]:
items = os.listdir('50_Annual_Reports')
companies_50 = [os.getcwd()+'/50_Annual_Reports/'+item for item in items if not item.startswith('.')]
items = os.listdir('8_Annual_Reports')
companies_8 = [os.getcwd()+'/8_Annual_Reports/'+item for item in items if not item.startswith('.')]

In [14]:
Assessment_Summary_8 = pd.read_csv('Assessment_Summary_8.csv')
Scores_8 = pd.read_csv('Scores_8.csv')
Score_Gov = [56,37,49,57,47,62,None,57]
SP_Score = [59,47,55,57,51,56,None,57]
MSCI_Score = ['aa','b','a',None,'aa','aa','aa','a']
SUS_Score = [22,26.8,22.9,10.1,17.7,24.2,17.8,14.4]
Scores_8['Score_Gov'] = Score_Gov
Scores_8['SP_Score'] = SP_Score
Scores_8['MSCI_Score'] = MSCI_Score
Scores_8['SUS_Score'] = SUS_Score
merged_df = pd.merge(Assessment_Summary_8, Scores_8, on='Name', how='left')
map = {'aaa':95,'aa':80,'a':65,'bbb':50,'bb':35,'b':20,'ccc':5}
merged_df['MSCI_Score'] = merged_df['MSCI_Score'].map(map)

Example_8 = {}
Example_8['All10_MSCI'] = dict(zip(merged_df['All_flexi_10'], merged_df['MSCI_Score']))
Example_8['All10_SP'] = dict(zip(merged_df['All_flexi_10'], merged_df['SP_Score']))
Example_8['All10_SP_G'] = dict(zip(merged_df['All_flexi_10'], merged_df['Score_Gov']))
Example_8['All10_SUS'] = dict(zip(merged_df['All_flexi_10'], merged_df['SUS_Score']))
Example_8['All10_10'] = dict(zip(merged_df['All_flexi_10'], merged_df['Score_10']))

Example_8['All20_MSCI'] = dict(zip(merged_df['All_flexi_20'], merged_df['MSCI_Score']))
Example_8['All20_SP'] = dict(zip(merged_df['All_flexi_20'], merged_df['SP_Score']))
Example_8['All20_SP_G'] = dict(zip(merged_df['All_flexi_20'], merged_df['Score_Gov']))
Example_8['All20_SUS'] = dict(zip(merged_df['All_flexi_20'], merged_df['SUS_Score']))
Example_8['All20_20']= dict(zip(merged_df['All_flexi_20'], merged_df['Score_20']))

Example_8['Summary10_MSCI'] = dict(zip(merged_df['Summary_10'], merged_df['MSCI_Score']))
Example_8['Summary10_SP'] = dict(zip(merged_df['Summary_10'], merged_df['SP_Score']))
Example_8['Summary10_SP_G'] = dict(zip(merged_df['Summary_10'], merged_df['Score_Gov']))
Example_8['Summary10_SUS'] = dict(zip(merged_df['Summary_10'], merged_df['SUS_Score']))
Example_8['Summary10_10'] = dict(zip(merged_df['Summary_10'], merged_df['Score_10']))

Example_8['Summary20_MSCI'] = dict(zip(merged_df['Summary_20'], merged_df['MSCI_Score']))
Example_8['Summary20_SP'] = dict(zip(merged_df['Summary_20'], merged_df['SP_Score']))
Example_8['Summary20_SP_G'] = dict(zip(merged_df['Summary_20'], merged_df['Score_Gov']))
Example_8['Summary20_SUS'] = dict(zip(merged_df['Summary_20'], merged_df['SUS_Score']))
Example_8['Summary20_20']= dict(zip(merged_df['Summary_20'], merged_df['Score_20']))

for sub_dict in Example_8.values():
    keys_to_remove = [key for key, value in sub_dict.items() if value is None]
    for key in keys_to_remove:
        del sub_dict[key]

In [15]:
try:
    Assessment_Summary = pd.read_csv('Assessment_Summary_50_final.csv')
except:
    print("Start form zero")
output = []

In [114]:
for company in companies_50[20:]:
    name = company.split('/')[-1]
    print("Assessing {}:".format(name))
    company_output = {}
    company_output['Name'] = name
    
    if 'Assessment_Summary' in vars():
        if name in Assessment_Summary['Name'].values and not pd.isnull(Assessment_Summary.loc[Assessment_Summary['Name'] == name, 'All_flexi_20'].values[0]):
            print("Already assessed ",name)
            continue
    
    gfs = [item for item in os.listdir(company) if item.split('.')[0]=='Governance']
    if len(gfs) > 0:
        if len(gfs) > 1:
            gf = [gf for gf in gfs if gf.endswith('.pdf')][0]
        else:
            gf = gfs[0]
    else:
        print('No Governance Files Found in {}'.format(company)) 
        continue
    gf_path = os.path.join(company, gf) 
    
    responses = Key_Metrics_Assessment(gf_path,knowledge_dict) 
    all_KMs_assessment = str() 
    for i,aspect in zip(range(0,11,2),['Tax Transparency', 'Business Ethics', 'Accounting', 'Ownership & Control', 'Pay', 'Board']): 
        company_output[aspect] = responses.data[i].content[0].text.value 

    for aspect in ['Tax Transparency', 'Business Ethics', 'Accounting', 'Ownership & Control', 'Pay', 'Board']: 
        all_KMs_assessment = all_KMs_assessment+'\n\n'+aspect+':\n'+company_output[aspect] 
    company_output['All'] = all_KMs_assessment 
    
#     responses = Key_Metrics_Assessment_one(gf_path,km_df)
#     Key_Metrics = []
#     for i in responses:
#         Key_Metrics.append(i.data[0].content[0].text.value)
#     Key_Metrics_df = pd.DataFrame({
#     "Key_Issues": km_df['Group'],
#     "Key_Metrics": km_df['Key Metrics Name'],
#     "Assessments": Key_Metrics[::-1]
# })
#     Key_Issues_dict = Key_Metrics_df.groupby('Group')['Value'].agg(' '.join).reset_index().set_index('Group')['Value'].to_dict()
#     for aspect in ['Tax Transparency', 'Business Ethics', 'Accounting', 'Ownership & Control', 'Pay', 'Board']:
#         company_output[aspect+'_one'] = Key_Issues_dict[aspect]
#         all_KMs_assessment_one = all_KMs_assessment_agg+'\n\n'+aspect+':\n'+company_output[aspect+'_one'] 
#     company_output['All_one'] = all_KMs_assessment_one
    
    responses_10 = Key_Metrics_Assessment_flexi(gf_path,km_df)
    Key_Metrics_10 = []
    for i in range(0,len(responses_10),2):
        Key_Metrics_10.append(responses_10[i].content[0].text.value)
    all_KMs_assessment_10 = "\n\n".join(Key_Metrics_10)
    company_output['All_flexi_10'] = all_KMs_assessment_10

    responses_20 = Key_Metrics_Assessment_flexi(gf_path,km_df,20)
    Key_Metrics_20 = []
    for i in range(0,len(responses_20),2):
        Key_Metrics_20.append(responses_20[i].content[0].text.value)
    all_KMs_assessment_20 = "\n\n".join(Key_Metrics_20)
    company_output['All_flexi_20'] = all_KMs_assessment_20
    
    summary = Summarizer(all_KMs_assessment)
    company_output['Summary'] = summary
    # summary_agg = Summarizer(all_KMs_assessment_one)
    # company_output['Summary_agg'] = summary_one
    summary_flexi = Summarizer(all_KMs_assessment_10)
    company_output['Summary_10'] = summary_flexi
    summary_flexi = Summarizer(all_KMs_assessment_20)
    company_output['Summary_20'] = summary_flexi
    
    output.append(company_output)

Assessing Severn Trent Plc:
Already assessed  Severn Trent Plc
Assessing Admiral Group:
Already assessed  Admiral Group
Assessing Tesco plc:
Already assessed  Tesco plc
Assessing SSE plc:
Already assessed  SSE plc
Assessing Lloyds Banking Group plc:
Already assessed  Lloyds Banking Group plc
Assessing 3i Group Plc:
No Governance Files Found in /Users/mason_yu/Study/ATFC-MSc/Project/Model/Final/50_Annual_Reports/3i Group Plc
Assessing Centrica plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:26<00:00, 18.79s/it]
Processing Key Metrics: 100%|██████████| 6/6 [02:44<00:00, 27.33s/it]


Assessing Auto Trader Group plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:25<00:00, 18.68s/it]
Processing Key Metrics: 100%|██████████| 6/6 [02:39<00:00, 26.54s/it]


Assessing Coca-Cola HBC AG:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:51<00:00, 21.05s/it]
Processing Key Metrics: 100%|██████████| 6/6 [02:51<00:00, 28.61s/it]


Assessing Mondi plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:41<00:00, 20.10s/it]
Processing Key Metrics: 100%|██████████| 6/6 [02:43<00:00, 27.28s/it]


Assessing Capita plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:40<00:00, 20.02s/it]
Processing Key Metrics: 100%|██████████| 6/6 [03:14<00:00, 32.34s/it]


Assessing Experian Plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:47<00:00, 20.73s/it]
Processing Key Metrics: 100%|██████████| 6/6 [02:55<00:00, 29.29s/it]


Assessing Legal & General Group plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:54<00:00, 21.33s/it]
Processing Key Metrics: 100%|██████████| 6/6 [02:41<00:00, 26.90s/it]


Assessing Compass Group plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:48<00:00, 20.79s/it]
Processing Key Metrics: 100%|██████████| 6/6 [02:40<00:00, 26.83s/it]


Assessing Diageo plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:45<00:00, 20.46s/it]
Processing Key Metrics:  83%|████████▎ | 5/6 [02:49<00:32, 32.04s/it]

Error processing batch starting at index 100, retry 1/3: Error code: 500 - {'error': {'message': 'Failed to create vector store.', 'type': 'server_error', 'param': None, 'code': None}}
Error processing batch starting at index 100, retry 2/3: Error code: 500 - {'error': {'message': 'Failed to create vector store.', 'type': 'server_error', 'param': None, 'code': None}}
Error processing batch starting at index 100, retry 3/3: Error code: 500 - {'error': {'message': 'Failed to create vector store.', 'type': 'server_error', 'param': None, 'code': None}}


Processing Key Metrics: 100%|██████████| 6/6 [03:19<00:00, 33.26s/it]


Assessing Barratt Developments plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:41<00:00, 20.12s/it]
Processing Key Metrics: 100%|██████████| 6/6 [02:50<00:00, 28.42s/it]


Assessing InterContinental Hotels Group plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:41<00:00, 20.16s/it]
Processing Key Metrics: 100%|██████████| 6/6 [02:58<00:00, 29.78s/it]


Assessing Kingfisher:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:06<00:00, 17.00s/it]
Processing Key Metrics: 100%|██████████| 6/6 [02:24<00:00, 24.09s/it]


Assessing Burberry Group plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:51<00:00, 21.04s/it]
Processing Key Metrics: 100%|██████████| 6/6 [02:35<00:00, 26.00s/it]


Assessing Croda International plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:50<00:00, 20.97s/it]
Processing Key Metrics: 100%|██████████| 6/6 [02:56<00:00, 29.41s/it]


Assessing Alfa Financial Software Holdings Ltd:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:44<00:00, 20.43s/it]
Processing Key Metrics: 100%|██████████| 6/6 [03:03<00:00, 30.51s/it]


Assessing M&G plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:56<00:00, 21.48s/it]
Processing Key Metrics: 100%|██████████| 6/6 [03:16<00:00, 32.68s/it]


Assessing Schroders plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:46<00:00, 20.58s/it]
Processing Key Metrics: 100%|██████████| 6/6 [02:48<00:00, 28.02s/it]


Assessing Barclays plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:40<00:00, 20.05s/it]
Processing Key Metrics: 100%|██████████| 6/6 [02:58<00:00, 29.69s/it]


Assessing Associated British Foods plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:50<00:00, 20.91s/it]
Processing Key Metrics: 100%|██████████| 6/6 [02:54<00:00, 29.10s/it]


Assessing Segro Plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:56<00:00, 21.46s/it]
Processing Key Metrics: 100%|██████████| 6/6 [02:50<00:00, 28.50s/it]


Assessing Intertek Group plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:43<00:00, 20.35s/it]
Processing Key Metrics: 100%|██████████| 6/6 [03:12<00:00, 32.11s/it]


Assessing Pearson plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:28<00:00, 18.95s/it]
Processing Key Metrics: 100%|██████████| 6/6 [02:41<00:00, 26.87s/it]


Assessing Smiths Group Plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:50<00:00, 20.95s/it]
Processing Key Metrics: 100%|██████████| 6/6 [02:44<00:00, 27.34s/it]


Assessing Rentokil Initial Plc:
Assessing Board ......
Assessing Pay ......
Assessing Ownership & Control ......
Assessing Accounting ......
Assessing Business Ethics ......
Assessing Tax Transparency ......


Processing Key Metrics: 100%|██████████| 11/11 [03:30<00:00, 19.11s/it]
Processing Key Metrics: 100%|██████████| 6/6 [02:54<00:00, 29.05s/it]


In [115]:
this_time_df = pd.DataFrame(output)
if 'Assessment_Summary' in vars():
    output_df = pd.concat([Assessment_Summary,pd.DataFrame(output)])
    # Assessment_Summary = Assessment_Summary.drop(columns=['All_flexi_20','Summary_20',])
    # output_df = Assessment_Summary.merge(this_time_df,how='left',on='Name')
else:
    output_df = pd.DataFrame(output)
output_df.to_csv('Assessment_Summary_50_final.csv',index=False)

In [124]:
output_df

Unnamed: 0,Name,Tax Transparency,Business Ethics,Accounting,Ownership & Control,Pay,Board,All,All_flexi_10,All_flexi_20,Summary,Summary_10,Summary_20
0,Shell plc,Based on the provided Key Metrics related to T...,Based on the provided Key Metrics related to B...,Based on the provided Key Metrics related to A...,Based on the provided Key Metrics related to O...,Based on the provided Key Metrics related to e...,Based on the provided Key Metrics and the info...,\n\nTax Transparency:\nBased on the provided K...,Based on the provided key metrics and the info...,Here is the evaluation of the Key Metrics base...,**Governance Indicators Report: Evaluation Sum...,**Governance Indicators Assessment Report**\n\...,**Governance Indicators Report**\n\nThe assess...
1,NatWest Group plc,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information provided in the docum...,\n\nTax Transparency:\nBased on the informatio...,Here is the evaluation of the Key Metrics base...,Here is the evaluation of the Key Metrics for ...,**Tax Transparency Assessment Report**\n\nThe ...,### Governance Indicators Report Summary\n\nTh...,### Governance Indicators Report\n\nThe govern...
2,Reckitt Benckiser Group Plc,"Based on the provided governance document, her...","Based on the provided governance document, her...","Based on the provided accounting document, her...","Based on the provided governance document, her...","Based on the provided remuneration document, h...","Based on the provided governance document, her...",\n\nTax Transparency:\nBased on the provided g...,Here is the evaluation of the Key Metrics for ...,Here is the evaluation of the Key Metrics for ...,**Tax Transparency**\n\nIn the realm of tax tr...,**Governance Indicators Report Summary**\n\nTh...,**Governance Structure and Board Independence*...
3,DCC plc,Based on the provided Key Metrics related to T...,Based on the provided Key Metrics related to B...,Based on the provided Key Metrics related to A...,Based on the provided Key Metrics related to O...,Based on the provided Key Metrics related to P...,Based on the provided Key Metrics and the cont...,\n\nTax Transparency:\nBased on the provided K...,Here is the evaluation of the Key Metrics base...,Based on the provided key metrics and the info...,**Governance Indicators Report Summary**\n\nTh...,**Governance Indicators Report: Summary Assess...,**Governance Indicators Report: Summary of Ass...
4,International Consolidated Airlines Group SA,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics for ...,\n\nTax Transparency:\nHere is the evaluation ...,Here is the evaluation of the Key Metrics base...,Here is the evaluation of the Key Metrics for ...,**Tax Transparency Assessment** \nThe evaluat...,**Governance Indicators Report: Summary of Ass...,**Governance Indicators Report: Summary and Ev...
5,Standard Chartered plc,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the provided information from the doc...,\n\nTax Transparency:\nBased on the informatio...,Here is the evaluation of the Key Metrics base...,Here is the evaluation of the Key Metrics for ...,**Governance Indicators Report: Summary Assess...,**Governance Indicators Report Summary**\n\nTh...,**Governance Indicators Report**\n\nThe govern...
6,Antofagasta plc,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,"Based on the provided document, here is the ev...",\n\nTax Transparency:\nHere is the evaluation ...,Here is the evaluation of the Key Metrics for ...,Here is the evaluation of the Key Metrics for ...,**Governance Indicators Report**\n\nThe assess...,**Governance Indicators Report**\n\nThe assess...,**Governance Indicators Report Summary**\n\nTh...
7,Taylor Wimpey plc,"Based on the provided document, here is the ev...","Based on the provided document, here is the ev...","Based on the provided document, here is the ev...","Based on the provided document, here is the ev...","Based on the provided document, here is the ev...","Based on the provided document, here is the ev...",\n\nTax Transparency:\nBased on the provided d...,Here is the evaluation of the Key Metrics base...,Here is the evaluation of the Key Metrics for ...,**Tax Transparency Report** \nThe evaluation ...,**Governance Indicators Report Summary**\n\nTh...,**Governance Structure Overview**\n\nThe gover...
8,Anglo American,Here is the evaluation of the Key Metric relat...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics for ...,\n\nTax Transparency:\nHere is the evaluation ...,Here is the evaluation of the Key Metrics base...,Here is the evaluation of the Key Metrics base...,**Governance Indicators Report Summary**\n\nTh...,**Governance Indicators Report: Summary and Re...,**Governance Indicators Report Summary**\n\nTh...
9,Halma plc,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information extracted from the do...,"Based on the provided document, here is the ev...",\n\nTax Transparency:\nBased on the informatio...,Here is the evaluation of the Key Metrics for ...,It seems that I couldn't find any relevant con...,**Tax Transparency Report for Halma plc**\n\nT...,**Governance Indicators Report Summary**\n\nTh...,**Governance Indicators Report Summary**\n\nTh...


In [39]:
try:
    Scores = pd.read_csv('Scores_20_less.csv')
except:
    print("Start from zero")

In [40]:
Assessment_Summary = pd.read_csv('Assessment_Summary_50_final.csv',nrows=20)

In [42]:
# Score_messages,score_df = Score_Rating(Assessment_Summary[['Name','Summary']],batch_size=10)
# Score_messages_10, score_df_10 = Score_Rating(Assessment_Summary[['Name', 'Summary_10']],batch_size=10)
# Score_messages_20, score_df_20 = Score_Rating(Assessment_Summary[['Name', 'Summary_20']],batch_size=10)
# score_df.rename(columns={'Score':'Score_10_b=10','Score_detail':'Score_detail_10_b=10'}, inplace=True)
# score_df_10.rename(columns={'Score':'Score_10_b=10','Score_detail':'Score_detail_10_b=10'}, inplace=True)
# score_df_20.rename(columns={'Score':'Score_20_b=10','Score_detail':'Score_detail_20_b=10'}, inplace=True)

Score_messages,score_df = Score_Rating(Assessment_Summary[['Name','All']],Assessment=True)
Score_messages_10, score_df_10 = Score_Rating(Assessment_Summary[['Name', 'All_flexi_10']],Assessment=True)
Score_messages_20, score_df_20 = Score_Rating(Assessment_Summary[['Name', 'All_flexi_20']],Assessment=True)
score_df.rename(columns={'Score':'Score_A','Score_detail':'Score_detail_A'}, inplace=True)
score_df_10.rename(columns={'Score':'Score_A_10','Score_detail':'Score_detail_A_10'}, inplace=True)
score_df_20.rename(columns={'Score':'Score_A_20','Score_detail':'Score_detail_A_20'}, inplace=True)

score_df_final = score_df.merge(score_df_10,how='left',on='Name').merge(score_df_20,how='left',on='Name')
# score_df_final = score_df_20

# score_df_final['Score_directly'] = Assessment_Summary['All'].apply(lambda x:100 - Rating_from_assessment(x))
# score_df_final['Score_directly_10'] = Assessment_Summary['All_flexi_10'].apply(lambda x:100 - Rating_from_assessment(x))
# score_df_final['Score_directly_20'] = Assessment_Summary['All_flexi_20'].apply(lambda x:100 - Rating_from_assessment(x))

Score removed
Rating for Shell plc......
Score removed
Rating for NatWest Group plc......
Score removed
Rating for Reckitt Benckiser Group Plc......
Score removed
Rating for DCC plc......
Score removed
Rating for International Consolidated Airlines Group SA......
Score removed
Rating for Standard Chartered plc......
Score removed
Rating for Antofagasta plc......
Score removed
Rating for Taylor Wimpey plc......
Score removed
Rating for Anglo American......
Score removed
Rating for Halma plc......
Score removed
Rating for AstraZeneca plc......
Score removed
Rating for Haleon plc......
Score removed
Rating for Land Securities Group plc......
Score removed
Rating for GSK plc......
Score removed
Rating for London Stock Exchange Group plc......
Score removed
Rating for Phoenix Group Holdings Plc......
Score removed
Rating for Bunzl plc......
Score removed
Rating for HSBC Holdings plc......
Score removed
Rating for Hikma Pharmaceuticals......
Score removed
Rating for RELX plc......
Score remo

In [45]:
score_df_final

Unnamed: 0,Name,Score_detail_A,Score_A,Score_detail_A_10,Score_A_10,Score_detail_A_20,Score_A_20
0,Shell plc,Based on the evaluations provided for each cat...,100,Based on the evaluations provided for each key...,40,Based on the evaluations of the key metrics pr...,0
1,NatWest Group plc,Based on the evaluations provided for each cat...,100,Based on the evaluations provided for each key...,100,Here is the evaluation of the Key Metrics for ...,0
2,Reckitt Benckiser Group Plc,Based on the evaluations provided for each cat...,100,Based on the evaluations provided for each key...,100,Here is the evaluation of the Key Metrics for ...,0
3,DCC plc,Based on the evaluations provided for each cat...,100,Based on the evaluations provided for each key...,80,Based on the evaluations of the key metrics pr...,0
4,International Consolidated Airlines Group SA,Based on the evaluations provided for each cat...,100,Based on the evaluations provided for each key...,80,Based on the evaluations of the key metrics pr...,0
5,Standard Chartered plc,Based on the evaluations provided for each cat...,100,Based on the evaluations provided for each key...,80,Based on the evaluations of the key metrics pr...,0
6,Antofagasta plc,Based on the evaluations provided for each cat...,100,Based on the evaluations provided for each key...,80,Based on the evaluations of the key metrics pr...,0
7,Taylor Wimpey plc,Based on the evaluations provided for each cat...,100,Based on the evaluations provided for each key...,80,Based on the evaluations of the key metrics pr...,0
8,Anglo American,Based on the evaluations provided for each cat...,100,Based on the evaluations provided for each key...,80,Based on the evaluations of the key metrics pr...,0
9,Halma plc,Based on the evaluations provided for each cat...,100,Based on the evaluations provided for each key...,80,Based on the evaluations of the key metrics pr...,0


In [21]:
Scores_temp = Scores.merge(score_df_final,how='left',on='Name')
Scores_temp.to_csv('Scores_50_ff.csv')

In [24]:
Scores = pd.read_csv('Scores_50_ff.csv')
Scores_final = Scores.copy(deep=True)

In [23]:
Scores_temp

Unnamed: 0,Name,Score_detail,Score,Score_detail_10,Score_10,Score_detail_20,Score_20,Score_directly,Score_directly_10,Score_directly_20,...,Score_detail_Summary20_SUS,Score_Summary20_SUS,Score_detail_Summary20_20,Score_Summary20_20,Score_detail_10_b=10_x,Score_10_b=10_x,Score_detail_10_b=10_y,Score_10_b=10_y,Score_detail_20_b=10,Score_20_b=10
0,Shell plc,"Based on the evaluation summary provided, here...",73,Based on the Governance Indicators Assessment ...,51,"Based on the governance indicators report, the...",65,91.375,91.627,95.45,...,Based on the detailed evaluation of the govern...,60,To rate the governance performance based on th...,55,"Based on the evaluation summary provided, here...",60,Based on the assessment of the company's gover...,30,Based on the governance performance summary pr...,63
1,NatWest Group plc,"Based on the evaluation summary provided, here...",96,Based on the Governance Indicators Report Summ...,64,"Based on the governance indicators report, the...",70,100.0,92.012,95.2,...,Based on the detailed evaluation of the govern...,58,To rate the governance performance based on th...,60,"Based on the evaluation summary provided, here...",96,Based on the assessment of the company's gover...,35,Based on the governance performance summary pr...,55
2,Reckitt Benckiser Group Plc,"Based on the evaluation summary provided, here...",83,Based on the Governance Indicators Report Summ...,64,"Based on the governance indicators report, the...",45,99.7,100.627,84.478,...,Based on the detailed evaluation of the govern...,55,To rate the governance performance based on th...,60,"Based on the evaluation summary provided, here...",84,Based on the assessment of the company's gover...,40,Based on the governance performance summary pr...,56
3,International Consolidated Airlines Group SA,"Based on the evaluation summary provided, here...",85,Based on the Governance Indicators Report Summ...,56,"Based on the governance indicators report, the...",30,101.369,90.479,99.719,...,Based on the detailed evaluation of the govern...,64,To rate the governance performance based on th...,60,"Based on the evaluation summary provided, here...",82,Based on the assessment of the company's gover...,35,Based on the governance performance summary pr...,55
4,Standard Chartered plc,"Based on the evaluation summary provided, here...",90,Based on the Governance Indicators Report Summ...,55,"Based on the governance indicators report, the...",35,99.4,73.802,88.048,...,Based on the detailed evaluation of the govern...,54,To rate the governance performance based on th...,65,"Based on the evaluation summary provided, here...",85,Based on the assessment of the company's gover...,35,Based on the governance performance summary pr...,56
5,Antofagasta plc,"Based on the evaluation summary provided, here...",87,"Based on the Governance Indicators Report, the...",58,Based on the governance indicators report summ...,30,96.9,92.521,83.338,...,Based on the detailed evaluation of the govern...,50,To rate the governance performance based on th...,60,"Based on the evaluation summary provided, here...",84,Based on the assessment of the company's gover...,20,Based on the governance performance summary pr...,51
6,Taylor Wimpey plc,"Based on the evaluation summary provided, here...",92,Based on the Governance Indicators Report Summ...,55,"Based on the governance structure overview, th...",15,99.1,87.202,93.8,...,Based on the detailed evaluation of the govern...,48,To rate the governance performance based on th...,60,"Based on the evaluation summary provided, here...",84,Based on the assessment of the company's gover...,30,Based on the governance performance summary pr...,49
7,Anglo American,"Based on the evaluation summary provided, here...",92,Based on the Governance Indicators Report Summ...,56,Based on the Governance Indicators Report summ...,25,97.7,95.509,90.907,...,Based on the detailed evaluation of the govern...,49,To rate the governance performance based on th...,50,"Based on the evaluation summary provided, here...",84,Based on the assessment of the company's gover...,40,Based on the governance performance summary pr...,51
8,Halma plc,Based on the evaluation summary provided for H...,91,Based on the Governance Indicators Report Summ...,57,Based on the Governance Indicators Report summ...,30,97.0,91.986,93.85,...,Based on the detailed evaluation of the govern...,54,To rate the governance performance based on th...,70,"Based on the evaluation summary provided, here...",82,Based on the assessment of the company's gover...,30,Based on the governance performance summary pr...,59
9,AstraZeneca plc,"Based on the evaluation summary provided, here...",94,Based on the Governance Indicators Report Summ...,57,Based on the Governance Indicators Report summ...,25,100.0,93.939,90.471,...,Based on the detailed evaluation of the govern...,51,To rate the governance performance based on th...,65,Based on the evaluation of the company's gover...,90,Based on the evaluation of AstraZeneca's gover...,65,Based on the governance performance summary pr...,60


In [None]:
Scores_final = Scores.copy(deep=True)
for Example_key in ['All10_MSCI', 'All10_SP', 'All10_SP_G', 'All10_SUS', 'All10_10']:
    example = Example_8[Example_key]
    sm,df = Score_Rating(Assessment_Summary[['Name', 'All_flexi_10']],Example=str(example))
    df.rename(columns={'Score':'Score_'+Example_key,'Score_detail':'Score_detail_'+Example_key}, inplace=True)
    Scores_final.merge(df,how='left',on='Name')

In [26]:
for Example_key in ['Summary10_MSCI', 'Summary10_SP', 'Summary10_SP_G', 'Summary10_SUS', 'Summary10_10']:
    if f'Score_{Example_key}_b=10' in Scores_final.columns:
        print(f"{Example_key} 已经处理过，跳过！")
        continue  # 跳过已经处理过的 Example_key
    example = Example_8[Example_key]
    sm,df = Score_Rating(Assessment_Summary[['Name', 'Summary_10']],Example=str(example),batch_size=10)
    df.rename(columns={'Score':'Score_'+Example_key+'_b=10','Score_detail':'Score_detail_'+Example_key+'_b=10'}, inplace=True)
    Scores_final = Scores_final.merge(df,how='left',on='Name')
    print(Example_key,', Done!')

Rating for Shell plc......
Rating for NatWest Group plc......
Rating for Reckitt Benckiser Group Plc......
Rating for DCC plc......
Rating for International Consolidated Airlines Group SA......
Rating for Standard Chartered plc......
Rating for Antofagasta plc......
Rating for Taylor Wimpey plc......
Rating for Anglo American......
Rating for Halma plc......
Rating for AstraZeneca plc......
Rating for Haleon plc......
Rating for Land Securities Group plc......
Rating for GSK plc......
Rating for London Stock Exchange Group plc......
Rating for Phoenix Group Holdings Plc......
Rating for Bunzl plc......
Rating for HSBC Holdings plc......
Rating for Hikma Pharmaceuticals......
Rating for RELX plc......
Rating for Severn Trent Plc......
Rating for Admiral Group......
Rating for Tesco plc......
Rating for SSE plc......
Rating for Lloyds Banking Group plc......
Rating for Centrica plc......
Rating for Auto Trader Group plc......
Rating for Coca-Cola HBC AG......
Rating for Mondi plc......
R

In [27]:
Scores_final

Unnamed: 0.1,Unnamed: 0,Name,Score_detail,Score,Score_detail_10,Score_10,Score_detail_20,Score_20,Score_directly,Score_directly_10,...,Score_detail_Summary10_MSCI_b=10,Score_Summary10_MSCI_b=10,Score_detail_Summary10_SP_b=10,Score_Summary10_SP_b=10,Score_detail_Summary10_SP_G_b=10,Score_Summary10_SP_G_b=10,Score_detail_Summary10_SUS_b=10,Score_Summary10_SUS_b=10,Score_detail_Summary10_10_b=10,Score_Summary10_10_b=10
0,0,Shell plc,"Based on the evaluation summary provided, here...",73,Based on the Governance Indicators Assessment ...,51,"Based on the governance indicators report, the...",65,91.375,91.627,...,Based on the governance performance evaluation...,60,Based on the governance performance evaluation...,50,Based on the governance performance evaluation...,0,Based on the analysis of the Governance Indica...,10,Based on the governance performance evaluation...,35
1,1,NatWest Group plc,"Based on the evaluation summary provided, here...",96,Based on the Governance Indicators Report Summ...,64,"Based on the governance indicators report, the...",70,100.0,92.012,...,Based on the governance performance evaluation...,75,Based on the governance performance evaluation...,20,Based on the governance performance evaluation...,0,Based on the evaluation of the Governance Indi...,55,Based on the governance performance evaluation...,30
2,2,Reckitt Benckiser Group Plc,"Based on the evaluation summary provided, here...",83,Based on the Governance Indicators Report Summ...,64,"Based on the governance indicators report, the...",45,99.7,100.627,...,Based on the governance performance evaluation...,70,Based on the governance performance evaluation...,50,Based on the governance performance evaluation...,0,Based on the evaluation of the Governance Indi...,80,Based on the governance performance evaluation...,45
3,3,International Consolidated Airlines Group SA,"Based on the evaluation summary provided, here...",85,Based on the Governance Indicators Report Summ...,56,"Based on the governance indicators report, the...",30,101.369,90.479,...,Based on the governance performance evaluation...,70,Based on the governance performance evaluation...,30,Based on the governance performance evaluation...,0,Based on the evaluation of the Governance Indi...,55,Based on the governance performance evaluation...,35
4,4,Standard Chartered plc,"Based on the evaluation summary provided, here...",90,Based on the Governance Indicators Report Summ...,55,"Based on the governance indicators report, the...",35,99.4,73.802,...,Based on the governance performance evaluation...,65,Based on the governance performance evaluation...,35,Based on the governance performance evaluation...,0,Based on the evaluation of the Governance Indi...,60,Based on the governance performance evaluation...,35
5,5,Antofagasta plc,"Based on the evaluation summary provided, here...",87,"Based on the Governance Indicators Report, the...",58,Based on the governance indicators report summ...,30,96.9,92.521,...,Based on the governance performance evaluation...,55,Based on the governance performance evaluation...,55,Based on the governance performance evaluation...,0,Based on the evaluation of the Governance Indi...,40,Based on the governance performance evaluation...,30
6,6,Taylor Wimpey plc,"Based on the evaluation summary provided, here...",92,Based on the Governance Indicators Report Summ...,55,"Based on the governance structure overview, th...",15,99.1,87.202,...,Based on the governance performance evaluation...,60,Based on the governance performance evaluation...,50,Based on the governance performance evaluation...,0,Based on the evaluation of the Governance Indi...,60,Based on the governance performance evaluation...,45
7,7,Anglo American,"Based on the evaluation summary provided, here...",92,Based on the Governance Indicators Report Summ...,56,Based on the Governance Indicators Report summ...,25,97.7,95.509,...,Based on the governance performance evaluation...,65,Based on the governance performance evaluation...,60,Based on the governance performance evaluation...,0,Based on the evaluation of the Governance Indi...,75,Based on the governance performance evaluation...,50
8,8,Halma plc,Based on the evaluation summary provided for H...,91,Based on the Governance Indicators Report Summ...,57,Based on the Governance Indicators Report summ...,30,97.0,91.986,...,Based on the governance performance evaluation...,60,Based on the governance performance evaluation...,35,Based on the governance performance evaluation...,0,Based on the evaluation of the Governance Indi...,55,Based on the governance performance evaluation...,55
9,9,AstraZeneca plc,"Based on the evaluation summary provided, here...",94,Based on the Governance Indicators Report Summ...,57,Based on the Governance Indicators Report summ...,25,100.0,93.939,...,Based on the governance performance evaluation...,0,Based on the governance performance evaluation...,45,Based on the governance performance evaluation...,0,Based on the governance performance evaluation...,0,Based on the governance performance evaluation...,45


In [30]:
Scores_final.to_csv('Scores_50_ff.csv',index=False)

In [None]:
Scores_final_csv = Scores_final[~Scores_final['Name'].isin(['Pearson plc','DCC plc'])]

In [38]:
Scores_final_csv.to_csv('Scores_final_50.csv',index=False)

In [25]:
if 'Scores' in vars():
    # score_df_final = pd.concat([Scores,score_df_final])
    # Scores_temp = Scores.drop(columns=['Score_directly_20','Score_20','Score_detail_20'])
    score_df_final = Scores.merge(score_df_final,how='left',on='Name')
score_df_final.to_csv('Scores.csv',index=False)

In [31]:
Scores_final.columns

Index(['Unnamed: 0', 'Name', 'Score_detail', 'Score', 'Score_detail_10',
       'Score_10', 'Score_detail_20', 'Score_20', 'Score_directly',
       'Score_directly_10', 'Score_directly_20', 'Score_detail_Summary10_MSCI',
       'Score_Summary10_MSCI', 'Score_detail_Summary10_SP',
       'Score_Summary10_SP', 'Score_detail_Summary10_SP_G',
       'Score_Summary10_SP_G', 'Score_detail_Summary10_SUS',
       'Score_Summary10_SUS', 'Score_detail_Summary10_10',
       'Score_Summary10_10', 'Score_detail_Summary20_MSCI',
       'Score_Summary20_MSCI', 'Score_detail_Summary20_SP',
       'Score_Summary20_SP', 'Score_detail_Summary20_SP_G',
       'Score_Summary20_SP_G', 'Score_detail_Summary20_SUS',
       'Score_Summary20_SUS', 'Score_detail_Summary20_20',
       'Score_Summary20_20', 'Score_detail_10_b=10_x', 'Score_10_b=10_x',
       'Score_detail_10_b=10_y', 'Score_10_b=10_y', 'Score_detail_20_b=10',
       'Score_20_b=10', 'Score_detail_Summary10_MSCI_b=10',
       'Score_Summary10_MSCI_

In [78]:
from PyPDF2 import PdfReader

def count_pages_in_file(gf_path):
    # Check if the file is a PDF or a TXT
    if gf_path.endswith('.txt'):
        with open(gf_path, 'r', encoding='utf-8') as file:
            content = file.read()
            # Count the number of page breaks, assuming they are represented by the form feed character ('\f')
            page_count = content.count('\f') + 1  # +1 because the last page won't have a delimiter
    elif gf_path.endswith('.pdf'):
        with open(gf_path, 'rb') as file:
            pdf_reader = PdfReader(file)
            page_count = len(pdf_reader.pages)
    else:
        raise ValueError("Unsupported file format")
    
    return page_count

In [79]:
for company in companies_50[:20]:
    name = company.split('/')[-1]
    company_output = {}
    company_output['Name'] = name
    
    gfs = [item for item in os.listdir(company) if item.split('.')[0]=='Governance']
    if len(gfs) > 0:
        if len(gfs) > 1:
            gf = [gf for gf in gfs if gf.endswith('.pdf')][0]
        else:
            gf = gfs[0]
    else:
        print('No Governance Files Found in {}'.format(company)) 
    gf_path = os.path.join(company, gf) 
    try:
        page_count = count_pages_in_file(gf_path)
        print("The file {} has {} pages.".format(gf_path, page_count))
    except Exception as e:
        print("An error occurred while counting pages in {}: {}".format(gf_path, str(e)))

The file /Users/mason_yu/Study/ATFC-MSc/Project/Model/Final/50_Annual_Reports/Shell plc/Governance.txt has 57 pages.
The file /Users/mason_yu/Study/ATFC-MSc/Project/Model/Final/50_Annual_Reports/NatWest Group plc/Governance.pdf has 52 pages.
The file /Users/mason_yu/Study/ATFC-MSc/Project/Model/Final/50_Annual_Reports/Reckitt Benckiser Group Plc/Governance.pdf has 52 pages.
The file /Users/mason_yu/Study/ATFC-MSc/Project/Model/Final/50_Annual_Reports/DCC plc/Governance.pdf has 18 pages.
The file /Users/mason_yu/Study/ATFC-MSc/Project/Model/Final/50_Annual_Reports/International Consolidated Airlines Group SA/Governance.pdf has 31 pages.
The file /Users/mason_yu/Study/ATFC-MSc/Project/Model/Final/50_Annual_Reports/Standard Chartered plc/Governance.pdf has 67 pages.
The file /Users/mason_yu/Study/ATFC-MSc/Project/Model/Final/50_Annual_Reports/Antofagasta plc/Governance.pdf has 78 pages.
The file /Users/mason_yu/Study/ATFC-MSc/Project/Model/Final/50_Annual_Reports/Taylor Wimpey plc/Governa

In [80]:
for company in companies_8:
    name = company.split('/')[-1]
    company_output = {}
    company_output['Name'] = name
    
    gfs = [item for item in os.listdir(company) if item.split('.')[0]=='Governance']
    if len(gfs) > 0:
        if len(gfs) > 1:
            gf = [gf for gf in gfs if gf.endswith('.pdf')][0]
        else:
            gf = gfs[0]
    else:
        print('No Governance Files Found in {}'.format(company)) 
    gf_path = os.path.join(company, gf) 
    try:
        page_count = count_pages_in_file(gf_path)
        print("The file {} has {} pages.".format(gf_path, page_count))
    except Exception as e:
        print("An error occurred while counting pages in {}: {}".format(gf_path, str(e)))

The file /Users/mason_yu/Study/ATFC-MSc/Project/Model/Final/8_Annual_Reports/Barclays PLC (UK)/Governance.pdf has 58 pages.
The file /Users/mason_yu/Study/ATFC-MSc/Project/Model/Final/8_Annual_Reports/Volkswagen AG (Europe)/Governance.txt has 52 pages.
The file /Users/mason_yu/Study/ATFC-MSc/Project/Model/Final/8_Annual_Reports/Deutsche Bank (Europe)/Governance.txt has 30 pages.
The file /Users/mason_yu/Study/ATFC-MSc/Project/Model/Final/8_Annual_Reports/Aroundtown SA (Europe)/Governance.txt has 19 pages.
The file /Users/mason_yu/Study/ATFC-MSc/Project/Model/Final/8_Annual_Reports/Natwest Group PLC (UK)/Governance.pdf has 66 pages.
The file /Users/mason_yu/Study/ATFC-MSc/Project/Model/Final/8_Annual_Reports/HSBC Holdings PLC (UK)/Governance.pdf has 56 pages.
The file /Users/mason_yu/Study/ATFC-MSc/Project/Model/Final/8_Annual_Reports/Societe Generale (Europe)/Governance.pdf has 58 pages.
The file /Users/mason_yu/Study/ATFC-MSc/Project/Model/Final/8_Annual_Reports/Vodafone Group PLC (UK

In [36]:
Scores = pd.read_csv('Scores_50_ff.csv')

In [38]:
Scores.columns

Index(['Name', 'Score_detail', 'Score', 'Score_detail_10', 'Score_10',
       'Score_detail_20', 'Score_20', 'Score_directly', 'Score_directly_10',
       'Score_directly_20', 'Score_detail_Summary10_MSCI',
       'Score_Summary10_MSCI', 'Score_detail_Summary10_SP',
       'Score_Summary10_SP', 'Score_detail_Summary10_SP_G',
       'Score_Summary10_SP_G', 'Score_detail_Summary10_SUS',
       'Score_Summary10_SUS', 'Score_detail_Summary10_10',
       'Score_Summary10_10', 'Score_detail_Summary20_MSCI',
       'Score_Summary20_MSCI', 'Score_detail_Summary20_SP',
       'Score_Summary20_SP', 'Score_detail_Summary20_SP_G',
       'Score_Summary20_SP_G', 'Score_detail_Summary20_SUS',
       'Score_Summary20_SUS', 'Score_detail_Summary20_20',
       'Score_Summary20_20', 'Score_detail_10_b=10', 'Score_10_b=10',
       'Score_detail_20_b=10', 'Score_20_b=10',
       'Score_detail_Summary10_MSCI_b=10', 'Score_Summary10_MSCI_b=10',
       'Score_detail_Summary10_SP_b=10', 'Score_Summary10_SP_b=

In [50]:
Assessment_Summary

Unnamed: 0,Name,Tax Transparency,Business Ethics,Accounting,Ownership & Control,Pay,Board,All,All_flexi_10,All_flexi_20,Summary,Summary_10,Summary_20
0,Shell plc,Based on the provided Key Metrics related to T...,Based on the provided Key Metrics related to B...,Based on the provided Key Metrics related to A...,Based on the provided Key Metrics related to O...,Based on the provided Key Metrics related to e...,Based on the provided Key Metrics and the info...,\n\nTax Transparency:\nBased on the provided K...,Based on the provided key metrics and the info...,Here is the evaluation of the Key Metrics base...,**Governance Indicators Report: Evaluation Sum...,**Governance Indicators Assessment Report**\n\...,**Governance Indicators Report**\n\nThe assess...
1,NatWest Group plc,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information provided in the docum...,\n\nTax Transparency:\nBased on the informatio...,Here is the evaluation of the Key Metrics base...,Here is the evaluation of the Key Metrics for ...,**Tax Transparency Assessment Report**\n\nThe ...,### Governance Indicators Report Summary\n\nTh...,### Governance Indicators Report\n\nThe govern...
2,Reckitt Benckiser Group Plc,"Based on the provided governance document, her...","Based on the provided governance document, her...","Based on the provided accounting document, her...","Based on the provided governance document, her...","Based on the provided remuneration document, h...","Based on the provided governance document, her...",\n\nTax Transparency:\nBased on the provided g...,Here is the evaluation of the Key Metrics for ...,Here is the evaluation of the Key Metrics for ...,**Tax Transparency**\n\nIn the realm of tax tr...,**Governance Indicators Report Summary**\n\nTh...,**Governance Structure and Board Independence*...
3,DCC plc,Based on the provided Key Metrics related to T...,Based on the provided Key Metrics related to B...,Based on the provided Key Metrics related to A...,Based on the provided Key Metrics related to O...,Based on the provided Key Metrics related to P...,Based on the provided Key Metrics and the cont...,\n\nTax Transparency:\nBased on the provided K...,Here is the evaluation of the Key Metrics base...,Based on the provided key metrics and the info...,**Governance Indicators Report Summary**\n\nTh...,**Governance Indicators Report: Summary Assess...,**Governance Indicators Report: Summary of Ass...
4,International Consolidated Airlines Group SA,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics for ...,\n\nTax Transparency:\nHere is the evaluation ...,Here is the evaluation of the Key Metrics base...,Here is the evaluation of the Key Metrics for ...,**Tax Transparency Assessment** \nThe evaluat...,**Governance Indicators Report: Summary of Ass...,**Governance Indicators Report: Summary and Ev...
5,Standard Chartered plc,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the provided information from the doc...,\n\nTax Transparency:\nBased on the informatio...,Here is the evaluation of the Key Metrics base...,Here is the evaluation of the Key Metrics for ...,**Governance Indicators Report: Summary Assess...,**Governance Indicators Report Summary**\n\nTh...,**Governance Indicators Report**\n\nThe govern...
6,Antofagasta plc,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,"Based on the provided document, here is the ev...",\n\nTax Transparency:\nHere is the evaluation ...,Here is the evaluation of the Key Metrics for ...,Here is the evaluation of the Key Metrics for ...,**Governance Indicators Report**\n\nThe assess...,**Governance Indicators Report**\n\nThe assess...,**Governance Indicators Report Summary**\n\nTh...
7,Taylor Wimpey plc,"Based on the provided document, here is the ev...","Based on the provided document, here is the ev...","Based on the provided document, here is the ev...","Based on the provided document, here is the ev...","Based on the provided document, here is the ev...","Based on the provided document, here is the ev...",\n\nTax Transparency:\nBased on the provided d...,Here is the evaluation of the Key Metrics base...,Here is the evaluation of the Key Metrics for ...,**Tax Transparency Report** \nThe evaluation ...,**Governance Indicators Report Summary**\n\nTh...,**Governance Structure Overview**\n\nThe gover...
8,Anglo American,Here is the evaluation of the Key Metric relat...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics rela...,Here is the evaluation of the Key Metrics for ...,\n\nTax Transparency:\nHere is the evaluation ...,Here is the evaluation of the Key Metrics base...,Here is the evaluation of the Key Metrics base...,**Governance Indicators Report Summary**\n\nTh...,**Governance Indicators Report: Summary and Re...,**Governance Indicators Report Summary**\n\nTh...
9,Halma plc,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information extracted from the do...,Based on the information extracted from the do...,"Based on the provided document, here is the ev...",\n\nTax Transparency:\nBased on the informatio...,Here is the evaluation of the Key Metrics for ...,It seems that I couldn't find any relevant con...,**Tax Transparency Report for Halma plc**\n\nT...,**Governance Indicators Report Summary**\n\nTh...,**Governance Indicators Report Summary**\n\nTh...


In [48]:
import tiktoken

def count_tokens(text):
    # 选择一个模型，确保与使用的模型一致，比如 "gpt-3.5-turbo" 或 "text-davinci-003"
    model = "gpt-3.5-turbo"
    
    # 加载对应模型的 tokenizer
    encoding = tiktoken.encoding_for_model(model)
    
    # 计算 token 数量
    tokens = encoding.encode(text)
    num_tokens = len(tokens)
    return num_tokens

In [49]:
count_tokens(Assessment_Summary['All'][0])

8617

In [51]:
count_tokens(Assessment_Summary['Summary'][0])

721