### Data Cleaning
##### Stop words removal, lemmatization, and tokenization

In [1]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

In [2]:
stops = set(stopwords.words('english'))
wordnet_lemmatizer = WordNetLemmatizer()

In [3]:
# Import all the txt file
import os
directory = "/Users/jianiwang/Desktop/6502_text_analysis_using_python/6502_Final_Project/extracted_text_by_chapter"
extracted_txt_folder = [file for file in os.listdir(directory) if file.endswith('.txt')]

In [5]:
extracted_txt_folder

['chapter_Energy_System.txt',
 'chapter_AFLOU.txt',
 'chapter_Industry.txt',
 'chapter_innovation.txt',
 'chapter_Transport.txt',
 'chapter_mitigation.txt',
 'chapter_Cross_sectoral.txt',
 'chapter_national_policies.txt',
 'chapter_intl_coop.txt',
 'chapter_Building.txt',
 'chapter_intro.txt',
 'chapter_policymaker.txt',
 'chapter_Urban_System.txt',
 'chapter_Sus_Dev.txt',
 'chapter_Investment.txt']

##### Imput all the txt file and save into dataframes for us to do the cleaning process. 

In [15]:
import glob
import pandas as pd

# Get a list of all the file paths that match the pattern "*.txt"
file_paths = glob.glob("/Users/jianiwang/Desktop/6502_text_analysis_using_python/6502_Final_Project/extracted_text_by_chapter/*.txt")

# Create an empty list to hold the dataframes
data = []

# Loop through each file path and read the file into a dataframe
for file_path in file_paths:
    with open(file_path, 'r') as file:
        data.append(file.read())  

In [18]:
data_csv = pd.DataFrame(data)

In [21]:
data_csv = data_csv.rename(columns = {0: 'text'})

In [22]:
data_csv.head()

Unnamed: 0,text
0,\nWarming cannot be limited to well below 2°C ...
1,"\nThe Agriculture, Forestry and Other Land Use..."
2,\nIndustry Chapter 1111\nExecutive Summary\nT...
3,\nExecutive Summary\nInnovation in climate mit...
4,\nChapter 10 Transport10Executive Summary\nMee...


##### stop word remove, lemmatization, tokenization

In [25]:
## Since we have 15 text files, I'm creating a function to do tokenization, remove stop words, and lemmatization for text and applying to the dataframe so it automatically helps me process all text at once

def process_text(text):
    # tokenization
    tokens = word_tokenize(text.lower())
    
    # stop words removal
    filtered_tokens = [token for token in tokens if token not in stops]
    
    # Lemmatize tokens
    lemmatized_tokens = [wordnet_lemmatizer.lemmatize(token) for token in filtered_tokens]
    
    # Rejoin tokens into string
    processed_text = ' '.join(lemmatized_tokens)
    return processed_text

# apply the function to the dataframe
ipcc_processed_csv['processed_text'] = ipcc_processed_csv['text'].apply(process_text)

# double-checking
ipcc_processed_csv.head()

Unnamed: 0,text,processed_text
0,\nWarming cannot be limited to well below 2°C ...,warming limited well 2°c without rapid deep re...
1,"\nThe Agriculture, Forestry and Other Land Use...","agriculture , forestry land use1 ( afolu ) sec..."
2,\nIndustry Chapter 1111\nExecutive Summary\nT...,industry chapter 1111 executive summary paris ...
3,\nExecutive Summary\nInnovation in climate mit...,executive summary innovation climate mitigatio...
4,\nChapter 10 Transport10Executive Summary\nMee...,chapter 10 transport10executive summary meetin...


In [37]:
ipcc_processed_csv['file_name'] = extracted_txt_folder
ipcc_processed_csv.head()

Unnamed: 0,text,processed_text,file_name
0,"\nWarming cannot be limited to well below 2°C without rapid \nand deep reductions in energy system carbon dioxide (CO 2) \nand greenhouse gas (GHG) emissions. In scenarios limiting \nwarming to 1.5°C (>50%) with no or limited overshoot (2°C (>67%) \nwith action starting in 2020), net energy system CO 2 emissions \n(interquartile range) fall by 87–97% (60–79%) in 2050. In 2030, \nin scenarios ...","warming limited well 2°c without rapid deep reduction energy system carbon dioxide ( co 2 ) greenhouse gas ( ghg ) emission . scenario limiting warming 1.5°c ( > 50 % ) limited overshoot ( 2°c ( > 67 % ) action starting 2020 ) , net energy system co 2 emission ( interquartile range ) fall 87–97 % ( 60–79 % ) 2050. 2030 , scenario limiting warming 1.5°c ( > 50 % ) limited overshoot , net co 2 g...",chapter_Energy_System.txt
1,"\nThe Agriculture, Forestry and Other Land Use1 (AFOLU) sector \nencompasses managed ecosystems and offers significant \nmitigation opportunities while delivering food, wood and \nother renewable resources as well as biodiversity conservation, \nprovided the sector adapts to climate change. Land-based \nmitigation measures represent some of the most important options \ncurrently available. The...","agriculture , forestry land use1 ( afolu ) sector encompasses managed ecosystem offer significant mitigation opportunity delivering food , wood renewable resource well biodiversity conservation , provided sector adapts climate change . land-based mitigation measure represent important option currently available . deliver carbon dioxide removal ( cdr ) substitute fossil fuel , thereby enabling ...",chapter_AFLOU.txt
2,"\nIndustry Chapter 1111\nExecutive Summary\nThe Paris Agreement, the Sustainable Development Goals \n(SDGs) and the COVID-19 pandemic provide a new context \nfor the evolution of industry and the mitigation of industry \ngreenhouse gas (GHG) emissions (high confidence). This chapter \nis focused on what is new since AR5. It emphasises the energy and \nemissions intensive basic materials indu...","industry chapter 1111 executive summary paris agreement , sustainable development goal ( sdgs ) covid-19 pandemic provide new context evolution industry mitigation industry greenhouse gas ( ghg ) emission ( high confidence ) . chapter focused new since ar5 . emphasis energy emission intensive basic material industry key strategy reaching net zero emission . { 11.1.1 } net zero co 2 emission in...",chapter_Industry.txt
3,"\nExecutive Summary\nInnovation in climate mitigation technologies has seen \nenormous activity and significant progress in recent years. \nInnovation has also led to, and exacerbated, trade-offs \nin relation to sustainable development (high confidence). \nInnovation can leverage action to mitigate climate change by \nreinforcing other interventions. In conjunction with other enabling \ncondi...","executive summary innovation climate mitigation technology seen enormous activity significant progress recent year . innovation also led , exacerbated , trade-off relation sustainable development ( high confidence ) . innovation leverage action mitigate climate change reinforcing intervention . conjunction enabling condition , innovation support system transition limit warming help shift devel...",chapter_innovation.txt
4,"\nChapter 10 Transport10Executive Summary\nMeeting climate mitigation goals would require transformative \nchanges in the transport sector (high confidence). In 2019, \ndirect greenhouse gas (GHG) emissions from the transport sector \nwere 8.7 GtCO 2-eq (up from 5.0 GtCO 2-eq in 1990) and accounted \nfor 23% of global energy-related CO 2 emissions. 70% of direct \ntransport emissions came fro...","chapter 10 transport10executive summary meeting climate mitigation goal would require transformative change transport sector ( high confidence ) . 2019 , direct greenhouse gas ( ghg ) emission transport sector 8.7 gtco 2-eq ( 5.0 gtco 2-eq 1990 ) accounted 23 % global energy-related co 2 emission . 70 % direct transport emission came road vehicle , 1 % , 11 % , 12 % came rail , shipping , avia...",chapter_Transport.txt


### Data Analysis

##### Name Entity Regonition: after running NER, I realize this does not provide any insightful infomation for our research topic due to the nature of a science-based report.

In [33]:
import spacy
from spacy import displacy
pd.options.display.max_rows = 600
pd.options.display.max_colwidth = 400

nlp = spacy.load("en_core_web_sm")
nlp.max_length = 2500000

In [40]:
all_entities = []
for i in range(len(ipcc_processed_csv)):
    text = ipcc_processed_csv.iloc[i,0]
    doc = nlp(text)
    entity_type = [] 
    for ent in doc.ents:
        entity_type.append(ent.label_)
    entity_identified = [] 
    for ent in doc.ents:
        entity_identified.append(ent.text)
    ent_dict = {'file_name': ipcc_processed_csv.iloc[i, 2], 'Entity_type': entity_type, 'Entity_identified': entity_identified}
    all_entities.append(ent_dict)

In [42]:
df_NER = pd.DataFrame(all_entities)
df_NER = df_NER.sort_values(by='file_name', ascending=True)
df_NER 

Unnamed: 0,file_name,Entity_type,Entity_identified
1,chapter_AFLOU.txt,"[ORG, ORG, ORG, CARDINAL, ORG, ORG, DATE, CARDINAL, ORG, PERCENT, ORG, CARDINAL, ORG, CARDINAL, ORG, DATE, ORG, ORG, CARDINAL, ORG, CARDINAL, WORK_OF_ART, CARDINAL, CARDINAL, CARDINAL, ORG, QUANTITY, CARDINAL, ORG, CARDINAL, CARDINAL, DATE, CARDINAL, CARDINAL, ORG, ORG, LAW, CARDINAL, CARDINAL, CARDINAL, CARDINAL, CARDINAL, CARDINAL, QUANTITY, DATE, CARDINAL, CARDINAL, ORG, CARDINAL, ORG, CARD...","[The Agriculture, Forestry and Other Land Use1, CDR, AFOLU, 1.5, AFOLU, WGI, 7.1, 7.6, AFOLU, 13–21%, GHG, around one third, AFOLU, 4.1, GtCO, between 2010 and 2019, FAOSTAT, GHG, 2, AFOLU, 0.0, GtCO 2, 2, 12.5, 3.2, GtCO, 6.6 ±, 5.2, GtCO, 2, 7.2, 7.2.2.5, 7.1, about half, GHG, GHG, CO 2 LULUCF, 4, 157, 47.1, 4, 6.6, 4.0, 4.2 ±, 1.3, 1.8, 1.1, GtCO, 2, IPCC, 4, between 2010 and 2019, 4, GHG, ..."
9,chapter_Building.txt,"[ORG, ORG, DATE, CARDINAL, ORG, PRODUCT, PERCENT, ORG, DATE, PERCENT, PERCENT, PERCENT, PERCENT, CARDINAL, PERCENT, PERCENT, ORG, CARDINAL, PERCENT, CARDINAL, DATE, CARDINAL, PERCENT, PERCENT, PERCENT, DATE, CARDINAL, PERCENT, PERCENT, PERCENT, CARDINAL, ORG, PERSON, ORG, PERSON, ORG, PERCENT, DATE, PERSON, DATE, ORG, ORG, CARDINAL, DATE, DATE, DATE, CARDINAL, CARDINAL, PERCENT, LOC, LOC, PERC...","[Buildings Chapter 99, GHG, 2019, 12, GtCO, 2-eq, 21%, GHG, that year, 57%, 24%, 18%, More than 95%, 4, 0.08%, 3%, GHG, 2, 31%, 128.8, 2019, 43, 31%, 18%, 70%, the period \n1990–2019, 2, 50%, 38%, 161%, 9.3, GHG, Sufficiency, GHG, Sufficiency, SER, 17%, 2050, Sufficiency, season, ICT, GHG, 9.2, 9.3, 9.4, 9.5, 9.6, 9.9, up to 85%, Europe, North America, up to 45%, Australia, Japan, New Zealand..."
6,chapter_Cross_sectoral.txt,"[ORG, LAW, DATE, CARDINAL, DATE, CARDINAL, CARDINAL, CARDINAL, CARDINAL, NORP, ORG, CARDINAL, ORG, ORG, CARDINAL, PERCENT, DATE, ORG, ORG, PERCENT, CARDINAL, PERCENT, CARDINAL, ORG, CARDINAL, CARDINAL, ORG, LAW, DATE, CARDINAL, ORG, CARDINAL, LAW, ORG, CARDINAL, LAW, DATE, DATE, ORG, ORG, ORG, CARDINAL, CARDINAL, ORG, DATE, CARDINAL, LAW, CARDINAL, CARDINAL, DATE, CARDINAL, PERCENT, ORG, ORG, ...","[Perspectives, Chapter 1212, the year \n2030, half, 2019, 2–1, more than half, 12.2, 12.3, Carbon, CDR, zero, CO 2, GHG, 2, 67%, 2100, CDR, CDR, 5–95%, 2, 67%, 2, AFOLU, 328, 168–763, CO 2, GtCO 2, annual, 2.75, GtCO 2, 2.98, GtCO 2, AFOLU, 0.02, GtCO 2, 2050, 12.3, Cross-Chapter Box 8, CDR, DACCS, 5–40, 2–1, DACCS, 2–4, 1 to about 100, GtCO 2, USD50 to 200, 2–1, 2–1, 12.3, Some 23–42%, GHG, G..."
0,chapter_Energy_System.txt,"[CARDINAL, ORG, CARDINAL, PERCENT, CARDINAL, PERCENT, DATE, CARDINAL, PERCENT, PERCENT, DATE, DATE, CARDINAL, PERCENT, ORG, PERCENT, PERCENT, CARDINAL, PERCENT, CARDINAL, PERCENT, PRODUCT, CARDINAL, DATE, DATE, CARDINAL, CARDINAL, CARDINAL, DATE, CARDINAL, ORG, PERCENT, DATE, CARDINAL, PERCENT, PERCENT, DATE, CARDINAL, PERCENT, DATE, CARDINAL, PERCENT, CARDINAL, PERCENT, DATE, PERCENT, PERCENT...","[2, GHG, 1.5, 50%, 2, 67%, 2020, 2, 87–97%, 60–79%, 2050, 2030, 1.5, 50%, GHG, 35–51%, 38–52%, 1.5, 50%, 2, 67%, CO 2, zero, between 2045 and 2055, 2050, 2080, 6.7, 2, the next 30 years, zero, CCS, 67–82%, 2030, 1.5, 50%, 93–97%, 2050, 2, 67%, 2020, 1.5, 50%, 2, 67%, 2020, 48–58%, 36–47%, 2050, 20%, 2019, 6.7, Net-zero, today, today, 6.6, Energy, 2015 to 2019, 6.6%, 2, 4.6%, GHG, 2.7%, 18%, GH..."
2,chapter_Industry.txt,"[ORG, ORG, ORG, ORG, ORG, CARDINAL, CARDINAL, CARDINAL, ORG, ORG, ORG, ORG, DATE, CARDINAL, DATE, CARDINAL, DATE, CARDINAL, DATE, ORG, CARDINAL, ORG, PERCENT, DATE, ORDINAL, ORG, CARDINAL, PERCENT, DATE, DATE, PERCENT, DATE, CARDINAL, CARDINAL, DATE, PERCENT, CARDINAL, ORG, CARDINAL, ORG, DATE, CARDINAL, CARDINAL, LOC, DATE, ORG, ORG, CARDINAL, CARDINAL, CARDINAL, ORG, ORG, ORG, CARDINAL, CARD...","[Industry Chapter 1111, The Paris Agreement, the Sustainable Development Goals, COVID-19, GHG, zero, zero, zero, GHG, CCU, CO 2, GHG, the last 30 years, zero, mid-century, 11.2, 11.3, 11.4, 2000, GHG, 14.1, GtCO, 24%, 2019, second, GHG, 7, 50%, 2019, 2000, 3.4%, 2000–2019, 11.2.1, 11.3.1, 1970, 99%, zero, GHG, zero, GHG, 2050, 11.3, 11.4, Key, recent years, CCS, CCS, 11.4.2.1, 11.3.5, zero, C..."
14,chapter_Investment.txt,"[ORG, ORG, PERSON, CARDINAL, ORG, CARDINAL, CARDINAL, ORG, CARDINAL, CARDINAL, DATE, CARDINAL, PERCENT, CARDINAL, CARDINAL, CARDINAL, CARDINAL, CARDINAL, CARDINAL, CARDINAL, DATE, CARDINAL, ORG, CARDINAL, CARDINAL, DATE, DATE, DATE, CARDINAL, DATE, CARDINAL, DATE, DATE, ORG, DATE, DATE, DATE, CARDINAL, DATE, DATE, DATE, GPE, CARDINAL, GPE, ORG, LAW, ORG, ORG, DATE, ORG, ORG, ORG, DATE, DATE, D...","[GHG, COVID-19, bias2, 15.2, the Paris Agreement, 15.2, 15.6, GHG, 15.3, 15.3, 15.2, 15.3, 90%, 3, 6, 4, 7, 15.4, 15.5, only one, 15.4.1, 15.5.2, the Paris Agreement, 15.6, 1.5, 15.6.1, 15.2, 15.3.2.3,, 15.6.1, 15.6.7, the next decade, 15.2, 15.6.1, the next decade, Climate, 15.2, 15.3.2.3, 15.6.1, 15.6.7, 15.2, 15.3.3, 15.6.1, 15.6.2, 15.6.3}Innovative, Paris, ESG, the Fifth Assessment Report..."
13,chapter_Sus_Dev.txt,"[DATE, CARDINAL, CARDINAL, ORG, ORG, ORG, CARDINAL, CARDINAL, CARDINAL, CARDINAL, DATE, DATE, DATE, CARDINAL, ORG, ORG, CARDINAL, ORG, DATE, DATE, ORG, DATE, CARDINAL, CARDINAL, CARDINAL, CARDINAL, CARDINAL, ORG, CARDINAL, CARDINAL, ORG, ORG, ORG, ORG, ORG, DATE, LAW, FAC, DATE, CARDINAL, ORG, GPE, NORP, DATE, ORG, CARDINAL, DATE, PERSON, DATE, CARDINAL, CARDINAL, DATE, PERSON, ORG, DATE, ORG,...","[decades, 17.1.1.2, 17.1.1.2, Response, Economics, Views, 17.2, 17.2, 17.3, 17.4, 17.3.3.6, 17.3.3, 17.3.3.1, 17.3.3.3, Digitalisation, GHG, 17.3.3, Response, 17.1.1.1, 17.3.2, 17.3.2.3, GHG, several decades, 17.4, 17.4.2, 17.4.6, One, 17.1, the Intergovernmental Panel, 17.2, 17.3, IPCC, the First Assessment Report, The Second Assessment Report, Banuri et al. 2001, UNFCCC, 2015, Article 3.4, T..."
4,chapter_Transport.txt,"[LAW, DATE, ORG, CARDINAL, ORG, CARDINAL, ORG, DATE, PERCENT, CARDINAL, PERCENT, PERCENT, PERCENT, PERCENT, LOC, LOC, DATE, CARDINAL, DATE, CARDINAL, ORG, ORG, ORG, ORG, ORG, LAW, DATE, CARDINAL, DATE, CARDINAL, PERCENT, LAW, LAW, DATE, CARDINAL, ORG, ORG, DATE, DATE, CARDINAL, DATE, DATE, DATE, CARDINAL, ORG, ORG, DATE, DATE, DATE, DATE, CARDINAL, CARDINAL, LAW, ORG, CARDINAL, PERCENT, DATE, ...","[Chapter 10, 2019, GHG, 8.7, GtCO, 5.0, GtCO, 1990, 23%, 2, 70%, 1%, 11%, 12%, Europe, North America, coming decades, 10.1, 10.5, 10.6, IPCC, Fifth Assessment Report, COVID-19, GHG, COVID-19, {Chapter 5, 10.2, 10.3, 10.4, 10.8, around 25%, Chapter 1, Chapter 5, 10.2, 10.8, GHG, GHG, 10.3, 10.4, 10.8, 2030, 10.3, 10.4, 10.8, CO 2, DAC, 10.2, 10.3, 10.4, 10.5, 10.6, 10.8, Chapter 1010, Scenarios..."
12,chapter_Urban_System.txt,"[ORG, LAW, CARDINAL, DATE, ORG, ORG, ORG, ORG, CARDINAL, PERSON, DATE, CARDINAL, ORG, CARDINAL, DATE, CARDINAL, PERCENT, DATE, CARDINAL, PERCENT, CARDINAL, PERCENT, DATE, CARDINAL, ORG, CARDINAL, PERCENT, DATE, PERCENT, DATE, DATE, PERCENT, LOC, PERCENT, LOC, LOC, PERCENT, PERCENT, LOC, LOC, LOC, PERCENT, LOC, LOC, PERCENT, LOC, DATE, CARDINAL, ORG, DATE, DATE, ORG, CARDINAL, DATE, ORG, CARDIN...","[Urban Systems, Chapter 88, 8.3.3, 8.4, GHG, GHG, GHG, GHG, 8.3, Box 8.1, 8.4, 8.6, GHG, 4, 2015, 25, about 62%, 2020, 29, 67–72%, About 100, approximately 18%, 8.1.6, 8.3.3, GHG, 6, 56%, 2000, 62%, 2015, 2000 to 2015, 28% to 38%, Africa, 46% to 54%, Asia, Pacific, 62% to 72%, 57% to 62%, Eastern Europe, West-Central, Asia, 55% to 66%, Latin America, Caribbean, 68% to 69%, the Middle East, 8.1..."
3,chapter_innovation.txt,"[DATE, ORG, CARDINAL, DATE, CARDINAL, CARDINAL, GPE, CARDINAL, DATE, LOC, CARDINAL, CARDINAL, CARDINAL, DATE, CARDINAL, CARDINAL, ORG, CARDINAL, FAC, DATE, DATE, CARDINAL, CARDINAL, NORP, GPE, CARDINAL, CARDINAL, DATE, LAW, PERSON, CARDINAL, ORG, ORG, DATE, CARDINAL, ORG, ORG, DATE, CARDINAL, CARDINAL, CARDINAL, CARDINAL, ORG, ORG, ORG, CARDINAL, ORG, PERSON, ORG, CARDINAL, CARDINAL, CARDINAL,...","[recent years, Sustainable Development Goals, 16.1, 16.2, 16.3, 16.4, 16.5.1, 16.6, the last decade, Latin America, 16.2, 16.2.4, 16.3, 16.3.4, 16.5, 16.7, Box 16.3, 16.10, Small Island Developing States, 16.4, 16.4.4.3, 16.4.4.4, 16.5, 16.7, Indian, India, 16.3, 16.5, 16.6, Chapter Box 12, Box 16.2, 16.6, IPR, IPR, 16.2.3.3, 16.5, Paris Agreement, the Clean Development Mechanism, the mid-\n20..."


In [43]:
df_NER = df_NER.set_index(['file_name'])
df_NER = df_NER.apply(pd.Series.explode).reset_index()
df_NER[:25]

Unnamed: 0,file_name,Entity_type,Entity_identified
0,chapter_AFLOU.txt,ORG,"The Agriculture, Forestry and Other Land Use1"
1,chapter_AFLOU.txt,ORG,CDR
2,chapter_AFLOU.txt,ORG,AFOLU
3,chapter_AFLOU.txt,CARDINAL,1.5
4,chapter_AFLOU.txt,ORG,AFOLU
5,chapter_AFLOU.txt,ORG,WGI
6,chapter_AFLOU.txt,DATE,7.1
7,chapter_AFLOU.txt,CARDINAL,7.6
8,chapter_AFLOU.txt,ORG,AFOLU
9,chapter_AFLOU.txt,PERCENT,13–21%


In [47]:
df_NER[df_NER['Entity_type'] == 'NORP'][:15]

Unnamed: 0,file_name,Entity_type,Entity_identified
119,chapter_AFLOU.txt,NORP,Integrated
269,chapter_AFLOU.txt,NORP,UNEP
425,chapter_AFLOU.txt,NORP,Estimates
436,chapter_AFLOU.txt,NORP,Prosperi
582,chapter_AFLOU.txt,NORP,Prosperi
583,chapter_AFLOU.txt,NORP,Tubiello
594,chapter_AFLOU.txt,NORP,Potapov
687,chapter_AFLOU.txt,NORP,Anthropogenic
699,chapter_AFLOU.txt,NORP,Tubiello
701,chapter_AFLOU.txt,NORP,Prosperi


In [49]:
## this is trying out the frequency of name entity for the AFLOU txt file (the first row)
doc = nlp(ipcc_processed_csv.iloc[0, 1])

# Initialize an empty dictionary to hold the named entity frequencies
entity_freq = {}

for ent in doc.ents:
    if ent.label_ not in entity_freq:
        entity_freq[ent.label_] = 1
    else:
        entity_freq[ent.label_] += 1

print(entity_freq)

{'CARDINAL': 1339, 'PERCENT': 456, 'DATE': 1781, 'GPE': 769, 'MONEY': 15, 'LOC': 129, 'LAW': 132, 'ORDINAL': 29, 'PERSON': 524, 'TIME': 12, 'ORG': 140, 'QUANTITY': 31, 'NORP': 70, 'EVENT': 2, 'PRODUCT': 9, 'FAC': 5, 'WORK_OF_ART': 1}


In [51]:
data_csv.head()

Unnamed: 0,text,processed_text,file_name
0,"\nWarming cannot be limited to well below 2°C without rapid \nand deep reductions in energy system carbon dioxide (CO 2) \nand greenhouse gas (GHG) emissions. In scenarios limiting \nwarming to 1.5°C (>50%) with no or limited overshoot (2°C (>67%) \nwith action starting in 2020), net energy system CO 2 emissions \n(interquartile range) fall by 87–97% (60–79%) in 2050. In 2030, \nin scenarios ...","warming limited well 2°c without rapid deep reduction energy system carbon dioxide ( co 2 ) greenhouse gas ( ghg ) emission . scenario limiting warming 1.5°c ( > 50 % ) limited overshoot ( 2°c ( > 67 % ) action starting 2020 ) , net energy system co 2 emission ( interquartile range ) fall 87–97 % ( 60–79 % ) 2050. 2030 , scenario limiting warming 1.5°c ( > 50 % ) limited overshoot , net co 2 g...",chapter_Energy_System.txt
1,"\nThe Agriculture, Forestry and Other Land Use1 (AFOLU) sector \nencompasses managed ecosystems and offers significant \nmitigation opportunities while delivering food, wood and \nother renewable resources as well as biodiversity conservation, \nprovided the sector adapts to climate change. Land-based \nmitigation measures represent some of the most important options \ncurrently available. The...","agriculture , forestry land use1 ( afolu ) sector encompasses managed ecosystem offer significant mitigation opportunity delivering food , wood renewable resource well biodiversity conservation , provided sector adapts climate change . land-based mitigation measure represent important option currently available . deliver carbon dioxide removal ( cdr ) substitute fossil fuel , thereby enabling ...",chapter_AFLOU.txt
2,"\nIndustry Chapter 1111\nExecutive Summary\nThe Paris Agreement, the Sustainable Development Goals \n(SDGs) and the COVID-19 pandemic provide a new context \nfor the evolution of industry and the mitigation of industry \ngreenhouse gas (GHG) emissions (high confidence). This chapter \nis focused on what is new since AR5. It emphasises the energy and \nemissions intensive basic materials indu...","industry chapter 1111 executive summary paris agreement , sustainable development goal ( sdgs ) covid-19 pandemic provide new context evolution industry mitigation industry greenhouse gas ( ghg ) emission ( high confidence ) . chapter focused new since ar5 . emphasis energy emission intensive basic material industry key strategy reaching net zero emission . { 11.1.1 } net zero co 2 emission in...",chapter_Industry.txt
3,"\nExecutive Summary\nInnovation in climate mitigation technologies has seen \nenormous activity and significant progress in recent years. \nInnovation has also led to, and exacerbated, trade-offs \nin relation to sustainable development (high confidence). \nInnovation can leverage action to mitigate climate change by \nreinforcing other interventions. In conjunction with other enabling \ncondi...","executive summary innovation climate mitigation technology seen enormous activity significant progress recent year . innovation also led , exacerbated , trade-off relation sustainable development ( high confidence ) . innovation leverage action mitigate climate change reinforcing intervention . conjunction enabling condition , innovation support system transition limit warming help shift devel...",chapter_innovation.txt
4,"\nChapter 10 Transport10Executive Summary\nMeeting climate mitigation goals would require transformative \nchanges in the transport sector (high confidence). In 2019, \ndirect greenhouse gas (GHG) emissions from the transport sector \nwere 8.7 GtCO 2-eq (up from 5.0 GtCO 2-eq in 1990) and accounted \nfor 23% of global energy-related CO 2 emissions. 70% of direct \ntransport emissions came fro...","chapter 10 transport10executive summary meeting climate mitigation goal would require transformative change transport sector ( high confidence ) . 2019 , direct greenhouse gas ( ghg ) emission transport sector 8.7 gtco 2-eq ( 5.0 gtco 2-eq 1990 ) accounted 23 % global energy-related co 2 emission . 70 % direct transport emission came road vehicle , 1 % , 11 % , 12 % came rail , shipping , avia...",chapter_Transport.txt


### Extract data associated with the confidence level

In [99]:
## This is to find the frequency of how many time each confidence level was mentioned in each section
import re

phrases = ['high confidence', 'medium confidence', 'low confidence']

def find_freq(text):

    frequency_dict = {}

    for phrase in phrases:
        frequency = len(re.findall(phrase, text))
        frequency_dict[phrase] = frequency

    return frequency_dict

ipcc_processed_csv['confidence_freq'] = ipcc_processed_csv['processed_text'].apply(find_freq)

In [100]:
ipcc_processed_csv.head()

Unnamed: 0,text,processed_text,file_name,confidence_freq
0,"\nWarming cannot be limited to well below 2°C without rapid \nand deep reductions in energy system carbon dioxide (CO 2) \nand greenhouse gas (GHG) emissions. In scenarios limiting \nwarming to 1.5°C (>50%) with no or limited overshoot (2°C (>67%) \nwith action starting in 2020), net energy system CO 2 emissions \n(interquartile range) fall by 87–97% (60–79%) in 2050. In 2030, \nin scenarios ...","warming limited well 2°c without rapid deep reduction energy system carbon dioxide ( co 2 ) greenhouse gas ( ghg ) emission . scenario limiting warming 1.5°c ( > 50 % ) limited overshoot ( 2°c ( > 67 % ) action starting 2020 ) , net energy system co 2 emission ( interquartile range ) fall 87–97 % ( 60–79 % ) 2050. 2030 , scenario limiting warming 1.5°c ( > 50 % ) limited overshoot , net co 2 g...",chapter_Energy_System.txt,"{'high confidence': 140, 'medium confidence': 26, 'low confidence': 4}"
1,"\nThe Agriculture, Forestry and Other Land Use1 (AFOLU) sector \nencompasses managed ecosystems and offers significant \nmitigation opportunities while delivering food, wood and \nother renewable resources as well as biodiversity conservation, \nprovided the sector adapts to climate change. Land-based \nmitigation measures represent some of the most important options \ncurrently available. The...","agriculture , forestry land use1 ( afolu ) sector encompasses managed ecosystem offer significant mitigation opportunity delivering food , wood renewable resource well biodiversity conservation , provided sector adapts climate change . land-based mitigation measure represent important option currently available . deliver carbon dioxide removal ( cdr ) substitute fossil fuel , thereby enabling ...",chapter_AFLOU.txt,"{'high confidence': 62, 'medium confidence': 65, 'low confidence': 7}"
2,"\nIndustry Chapter 1111\nExecutive Summary\nThe Paris Agreement, the Sustainable Development Goals \n(SDGs) and the COVID-19 pandemic provide a new context \nfor the evolution of industry and the mitigation of industry \ngreenhouse gas (GHG) emissions (high confidence). This chapter \nis focused on what is new since AR5. It emphasises the energy and \nemissions intensive basic materials indu...","industry chapter 1111 executive summary paris agreement , sustainable development goal ( sdgs ) covid-19 pandemic provide new context evolution industry mitigation industry greenhouse gas ( ghg ) emission ( high confidence ) . chapter focused new since ar5 . emphasis energy emission intensive basic material industry key strategy reaching net zero emission . { 11.1.1 } net zero co 2 emission in...",chapter_Industry.txt,"{'high confidence': 14, 'medium confidence': 4, 'low confidence': 0}"
3,"\nExecutive Summary\nInnovation in climate mitigation technologies has seen \nenormous activity and significant progress in recent years. \nInnovation has also led to, and exacerbated, trade-offs \nin relation to sustainable development (high confidence). \nInnovation can leverage action to mitigate climate change by \nreinforcing other interventions. In conjunction with other enabling \ncondi...","executive summary innovation climate mitigation technology seen enormous activity significant progress recent year . innovation also led , exacerbated , trade-off relation sustainable development ( high confidence ) . innovation leverage action mitigate climate change reinforcing intervention . conjunction enabling condition , innovation support system transition limit warming help shift devel...",chapter_innovation.txt,"{'high confidence': 20, 'medium confidence': 7, 'low confidence': 0}"
4,"\nChapter 10 Transport10Executive Summary\nMeeting climate mitigation goals would require transformative \nchanges in the transport sector (high confidence). In 2019, \ndirect greenhouse gas (GHG) emissions from the transport sector \nwere 8.7 GtCO 2-eq (up from 5.0 GtCO 2-eq in 1990) and accounted \nfor 23% of global energy-related CO 2 emissions. 70% of direct \ntransport emissions came fro...","chapter 10 transport10executive summary meeting climate mitigation goal would require transformative change transport sector ( high confidence ) . 2019 , direct greenhouse gas ( ghg ) emission transport sector 8.7 gtco 2-eq ( 5.0 gtco 2-eq 1990 ) accounted 23 % global energy-related co 2 emission . 70 % direct transport emission came road vehicle , 1 % , 11 % , 12 % came rail , shipping , avia...",chapter_Transport.txt,"{'high confidence': 15, 'medium confidence': 11, 'low confidence': 4}"


In [165]:
display(ipcc_processed_csv)

Unnamed: 0,text,processed_text,file_name,confidence_freq,high confidence_text,medium confidence_text,low confidence_text,high_tfidf,medium_tfidf,low_tfidf,overall_sentiment,sentiment_high,sentiment_medium,sentiment_low
0,"\nWarming cannot be limited to well below 2°C without rapid \nand deep reductions in energy system carbon dioxide (CO 2) \nand greenhouse gas (GHG) emissions. In scenarios limiting \nwarming to 1.5°C (>50%) with no or limited overshoot (2°C (>67%) \nwith action starting in 2020), net energy system CO 2 emissions \n(interquartile range) fall by 87–97% (60–79%) in 2050. In 2030, \nin scenarios ...","warming limited well 2°c without rapid deep reduction energy system carbon dioxide ( co 2 ) greenhouse gas ( ghg ) emission . scenario limiting warming 1.5°c ( > 50 % ) limited overshoot ( 2°c ( > 67 % ) action starting 2020 ) , net energy system co 2 emission ( interquartile range ) fall 87–97 % ( 60–79 % ) 2050. 2030 , scenario limiting warming 1.5°c ( > 50 % ) limited overshoot , net co 2 g...",chapter_Energy_System.txt,"{'high confidence': 140, 'medium confidence': 26, 'low confidence': 4}","scenario limiting warming 1.5°c ( > 50 % ) limited overshoot ( 2°c ( > 67 % ) ) , net electricity sector co 2 emission reach zero globally 2045 2055 ( 2050 2080 ) . ( high confidence ) { 6.7 } limiting warming well 2°c require substantial energy system change next 30 year . includes reduced fossil fuel consumption , increased production low- zero-carbon energy source , increased use electricit...","advance low-carbon energy resource carrier next-generation biofuels , hydrogen produced electrolysis , synthetic fuel , carbon-neutral ammonia would substantially improve economics net-zero energy system . ( medium confidence ) { 6.4 , 6.7 } global energy system largest source co 2 emission ( chapter 2 ) . reducing energy sector emission therefore essential limit warming . 2020 ) , fundamental...","assessment method explained annex ii.11 . geophysical environmental-ecological technological economic socio-cultural institutionalphysical potential geophysical resource land use air pollution toxic waste , ecotoxicity eutrophication water quantity quality biodiversity simplicity technological scalability maturity technology readiness cost 2030 long term effect employment economic growth publi...","[(energy, 0.4676206229823656), (confidence, 0.39468896618695076), (high, 0.39468896618695076), (system, 0.24668060386684423), (cost, 0.16302370342504488), (al, 0.15229845977866036), (et, 0.15015341104938346), (electricity, 0.14157321613227583), (carbon, 0.1308479724858913), (low, 0.12870292375661438), (fossil, 0.10939748519312223), (fuel, 0.10939748519312223), (zero, 0.09867224154673769), (win...","[(confidence, 0.3911657266912561), (medium, 0.2825085803881294), (energy, 0.26077715112750405), (warming, 0.22818000723656606), (electricity, 0.2064485779759407), (al, 0.19558286334562805), (et, 0.19558286334562805), (cost, 0.16298571945469004), (low, 0.15212000482437738), (2020, 0.1412542901940647), (high, 0.1412542901940647), (use, 0.13038857556375202), (zero, 0.13038857556375202), (carbon, ...","[(confidence, 0.3232299675777271), (energy, 0.3232299675777271), (low, 0.2770542579237661), (wind, 0.2770542579237661), (climate, 0.18470283861584408), (effect, 0.18470283861584408), (high, 0.18470283861584408), (al, 0.13852712896188304), (carbon, 0.13852712896188304), (change, 0.13852712896188304), (enablers, 0.13852712896188304), (et, 0.13852712896188304), (impact, 0.13852712896188304), (res...",1.0,1.0,0.9997,0.9906
1,"\nThe Agriculture, Forestry and Other Land Use1 (AFOLU) sector \nencompasses managed ecosystems and offers significant \nmitigation opportunities while delivering food, wood and \nother renewable resources as well as biodiversity conservation, \nprovided the sector adapts to climate change. Land-based \nmitigation measures represent some of the most important options \ncurrently available. The...","agriculture , forestry land use1 ( afolu ) sector encompasses managed ecosystem offer significant mitigation opportunity delivering food , wood renewable resource well biodiversity conservation , provided sector adapts climate change . land-based mitigation measure represent important option currently available . deliver carbon dioxide removal ( cdr ) substitute fossil fuel , thereby enabling ...",chapter_AFLOU.txt,"{'high confidence': 62, 'medium confidence': 65, 'low confidence': 7}","deliver carbon dioxide removal ( cdr ) substitute fossil fuel , thereby enabling emission reduction sector . rapid deployment afolu measure essential pathway staying within limit remaining budget 1.5°c target ( high confidence ) . carefully appropriately implemented , afolu mitigation measure uniquely positioned deliver substantial co-benefits help address many wider challenge associated land ...","time capacity land support function may threatened climate change ( high confidence ) . { ipcc ar6 wgi , figure spm.7 ; ipcc ar6 wgii , 7.1 , 7.6 } afolu ( managed land ) sector , average , accounted 13–21 % global total anthropogenic greenhouse gas ( ghg ) emission period 2010–2019 ( medium confidence ) . time managed natural terrestrial ecosystem carbon sink , absorbing around one third anth...","2015 ) . low confidence remains proposing specific change land condition achieve desired impact local , regional global climate due : poor relationship change surface albedo change surface temperature ( davin de noblet-ducoudré 2010 ) , compensation feedback among biophysical process ( bonan 2016 ; kalliokoski et al . 2020 ) , climate seasonal dependency biophysical effect ( bonan 2016 ) , omi...","[(confidence, 0.42545103103257703), (high, 0.3403608248260616), (emission, 0.30632474234345547), (mitigation, 0.26377963924019776), (yr, 0.22123453613694005), (afolu, 0.19570747427498544), (potential, 0.13614432993042466), (climate, 0.12763530930977313), (land, 0.12763530930977313), (gtco, 0.12338079899944734), (measure, 0.11912628868912158), (2020, 0.11487177837879581), (net, 0.11061726806847...","[(confidence, 0.36968858181893005), (yr, 0.3245044218188386), (medium, 0.2875355636369456), (gtco, 0.26288965818235027), (emission, 0.2546743563641518), (potential, 0.23824375272775491), (eq, 0.1971672436367627), (mitigation, 0.18484429090946503), (al, 0.1643060363639689), (et, 0.1643060363639689), (co, 0.13555248000027434), (land, 0.12322952727297667), (global, 0.11912187636387746), (net, 0.1...","[(confidence, 0.35874800166708765), (carbon, 0.25112360116696136), (low, 0.25112360116696136), (potential, 0.21524880100025257), (al, 0.17937400083354382), (change, 0.17937400083354382), (et, 0.17937400083354382), (mitigation, 0.17937400083354382), (yr, 0.17937400083354382), (climate, 0.14349920066683505), (2010, 0.10762440050012628), (2050, 0.10762440050012628), (coastal, 0.10762440050012628)...",1.0,0.9999,0.9999,0.962
2,"\nIndustry Chapter 1111\nExecutive Summary\nThe Paris Agreement, the Sustainable Development Goals \n(SDGs) and the COVID-19 pandemic provide a new context \nfor the evolution of industry and the mitigation of industry \ngreenhouse gas (GHG) emissions (high confidence). This chapter \nis focused on what is new since AR5. It emphasises the energy and \nemissions intensive basic materials indu...","industry chapter 1111 executive summary paris agreement , sustainable development goal ( sdgs ) covid-19 pandemic provide new context evolution industry mitigation industry greenhouse gas ( ghg ) emission ( high confidence ) . chapter focused new since ar5 . emphasis energy emission intensive basic material industry key strategy reaching net zero emission . { 11.1.1 } net zero co 2 emission in...",chapter_Industry.txt,"{'high confidence': 14, 'medium confidence': 4, 'low confidence': 0}","industry chapter 1111 executive summary paris agreement , sustainable development goal ( sdgs ) covid-19 pandemic provide new context evolution industry mitigation industry greenhouse gas ( ghg ) emission ( high confidence ) . chapter focused new since ar5 . emphasis energy emission intensive basic material industry key strategy reaching net zero emission . { 11.1.1 } net zero co 2 emission i...","{ 11.4.1.3 } scenario analysis show significant cut global ghg emission even close net zero emission ghg intensive industry ( e.g. , steel , plastic , ammonia , cement ) achieved 2050 deploying multiple available emerging option ( medium confidence ) . cutting industry emission significantly requires reorientation historic focus important incremental improvement ( e.g. , biomethane methanol ) ...",,"[(11, 0.3818730893140701), (emission, 0.3818730893140701), (high, 0.2603680154414114), (material, 0.2603680154414114), (confidence, 0.24301014774531735), (energy, 0.17357867696094095), (low, 0.17357867696094095), (carbon, 0.13886294156875276), (zero, 0.13886294156875276), (efficiency, 0.12150507387265867), (ghg, 0.12150507387265867), (industry, 0.12150507387265867), (use, 0.12150507387265867),...","[(emission, 0.4740454631399772), (11, 0.2844272778839863), (industry, 0.2844272778839863), (carbon, 0.1896181852559909), (confidence, 0.1896181852559909), (cost, 0.1896181852559909), (low, 0.1896181852559909), (medium, 0.1896181852559909), (cement, 0.14221363894199315), (production, 0.14221363894199315), (source, 0.14221363894199315), (zero, 0.14221363894199315), (basic, 0.09480909262799544), ...",[],1.0,0.9986,0.9682,0.0
3,"\nExecutive Summary\nInnovation in climate mitigation technologies has seen \nenormous activity and significant progress in recent years. \nInnovation has also led to, and exacerbated, trade-offs \nin relation to sustainable development (high confidence). \nInnovation can leverage action to mitigate climate change by \nreinforcing other interventions. In conjunction with other enabling \ncondi...","executive summary innovation climate mitigation technology seen enormous activity significant progress recent year . innovation also led , exacerbated , trade-off relation sustainable development ( high confidence ) . innovation leverage action mitigate climate change reinforcing intervention . conjunction enabling condition , innovation support system transition limit warming help shift devel...",chapter_innovation.txt,"{'high confidence': 20, 'medium confidence': 7, 'low confidence': 0}","executive summary innovation climate mitigation technology seen enormous activity significant progress recent year . innovation also led , exacerbated , trade-off relation sustainable development ( high confidence ) . innovation leverage action mitigate climate change reinforcing intervention . conjunction enabling condition , innovation support system transition limit warming help shift devel...","suggests development planning innovation capability remains necessary , especially least- developed country sids . international diffusion low-emission technology also facilitated knowledge spillover region engaged clean r & ( medium confidence ) . { 16.6 } evidence role intellectual property right ( ipr ) innovation mixed . { 16.6 } evidence role intellectual property right ( ipr ) innovation...",,"[(innovation, 0.4776659519992759), (16, 0.41536169739067474), (high, 0.28036914573870547), (confidence, 0.25960106086917173), (technology, 0.21806489113010424), (development, 0.1661446789562699), (system, 0.14537659408673614), (technological, 0.1349925516519693), (box, 0.12460850921720242), (change, 0.12460850921720242), (climate, 0.10384042434766869), (country, 0.10384042434766869), (emission...","[(confidence, 0.2980099775410749), (technology, 0.2980099775410749), (medium, 0.26820897978696745), (change, 0.20860698427875243), (country, 0.17880598652464494), (mitigation, 0.17880598652464494), (also, 0.14900498877053744), (climate, 0.14900498877053744), (development, 0.14900498877053744), (emission, 0.14900498877053744), (innovation, 0.14900498877053744), (16, 0.11920399101642996), (chapt...",[],1.0,0.9999,0.9987,0.0
4,"\nChapter 10 Transport10Executive Summary\nMeeting climate mitigation goals would require transformative \nchanges in the transport sector (high confidence). In 2019, \ndirect greenhouse gas (GHG) emissions from the transport sector \nwere 8.7 GtCO 2-eq (up from 5.0 GtCO 2-eq in 1990) and accounted \nfor 23% of global energy-related CO 2 emissions. 70% of direct \ntransport emissions came fro...","chapter 10 transport10executive summary meeting climate mitigation goal would require transformative change transport sector ( high confidence ) . 2019 , direct greenhouse gas ( ghg ) emission transport sector 8.7 gtco 2-eq ( 5.0 gtco 2-eq 1990 ) accounted 23 % global energy-related co 2 emission . 70 % direct transport emission came road vehicle , 1 % , 11 % , 12 % came rail , shipping , avia...",chapter_Transport.txt,"{'high confidence': 15, 'medium confidence': 11, 'low confidence': 4}","chapter 10 transport10executive summary meeting climate mitigation goal would require transformative change transport sector ( high confidence ) . 2019 , direct greenhouse gas ( ghg ) emission transport sector 8.7 gtco 2-eq ( 5.0 gtco 2-eq 1990 ) accounted 23 % global energy-related co 2 emission . emission shipping aviation continue grow rapidly . transport- related emission developing regio...","electromobility powered low-carbon electricity potential rapidly reduce transport ghg applied multiple co-benefits developing world ’ growing city ( high confidence ) . { 10.3 , 10.4 , 10.8 } land-based , long-range , heavy-duty truck decarbonised battery electric haulage ( including use electric road system ) , complemented hydrogen- biofuel- based fuel context ( medium confidence ) . technol...",advanced biofuels could provide low-carbon jet fuel ( medium confidence ) . production synthetic fuel using low-carbon hydrogen co 2 captured direct air capture ( dac ) bioenergy carbon capture storage ( beccs ) could provide jet marine fuel option still require demonstration scale ( low confidence ) . ammonia produced low-carbon hydrogen could also serve marine fuel ( medium confidence ) . 20...,"[(10, 0.3903352467758598), (confidence, 0.35130172209827376), (transport, 0.35130172209827376), (high, 0.32527937231321646), (fuel, 0.1951676233879299), (emission, 0.16914527360287257), (hydrogen, 0.1561340987103439), (carbon, 0.1301117489252866), (demand, 0.11710057403275793), (low, 0.11710057403275793), (sector, 0.11710057403275793), (aviation, 0.10408939914022927), (based, 0.104089399140229...","[(10, 0.4219146387646126), (confidence, 0.38816146766344356), (medium, 0.28690195435993654), (fuel, 0.25314878325876755), (could, 0.18564244105642955), (carbon, 0.16876585550584502), (low, 0.16876585550584502), (electric, 0.15188926995526053), (hydrogen, 0.15188926995526053), (based, 0.13501268440467604), (system, 0.13501268440467604), (transport, 0.13501268440467604), (decarbonisation, 0.1181...","[(confidence, 0.26594532430676177), (low, 0.26594532430676177), (aviation, 0.2216211035889681), (fuel, 0.2216211035889681), (land, 0.2216211035889681), (shipping, 0.2216211035889681), (transport, 0.2216211035889681), (carbon, 0.1772968828711745), (effect, 0.1772968828711745), (contrail, 0.13297266215338088), (could, 0.13297266215338088), (enablers, 0.13297266215338088), (hydrogen, 0.1329726621...",1.0,0.9996,0.9987,0.9826
5,"\nExecutive Summary\nGlobal net anthropogenic greenhouse gas (GHG) emissions \nduring the last decade (2010–2019) were higher than at any \nprevious time in human history (high confidence). Since 2010, \nGHG emissions have continued to grow, reaching 59 ± 6.6 GtCO 2-eq \nin 2019,1 but the average annual growth in the last decade \n(1.3%, 2010–2019) was lower than in the previous decade (2.1%,...","executive summary global net anthropogenic greenhouse gas ( ghg ) emission last decade ( 2010–2019 ) higher previous time human history ( high confidence ) . since 2010 , ghg emission continued grow , reaching 59 ± 6.6 gtco 2-eq 2019,1 average annual growth last decade ( 1.3 % , 2010–2019 ) lower previous decade ( 2.1 % , 2000–2009 ) ( high confidence ) . average annual ghg emission 9.1 gtco 2...",chapter_mitigation.txt,"{'high confidence': 178, 'medium confidence': 54, 'low confidence': 5}","executive summary global net anthropogenic greenhouse gas ( ghg ) emission last decade ( 2010–2019 ) higher previous time human history ( high confidence ) . since 2010 , ghg emission continued grow , reaching 59 ± 6.6 gtco 2-eq 2019,1 average annual growth last decade ( 1.3 % , 2010–2019 ) lower previous decade ( 2.1 % , 2000–2009 ) ( high confidence ) . executive summary global net anthropo...","{ 2.4.1 , figure 2.16 } global covid-19 pandemic led steep drop co 2 emission fossil fuel industry ( high confidence ) . global co 2-ffi emission dropped 2020 5.8 % ( 5.1–6.3 % ) 2.2 ( 1.9–2.4 ) gtco 2 compared 2019. emission , however , rebounded globally end december 2020 ( medium confidence ) . { 2.2.2 , figure 2.6 } commonly used wide part literature climate change mitigation required repo...","ghg emission reached co2 emission ffi 38 ( ±3.0 ) gt , co 2 lulucf 6.6 ± 4.6 gt , ch 4 11 ± 3.2 gtco 2-eq , n 2o 2.7 ± 1.6 gtco 2-eq f-gases 1.4 ± 0.41 gtco 2-eq . high confidence average annual ghg emission last decade ( 2010–2019 ) highest record term aggregate co 2-eq emission , low confidence annual emission 2019 uncertainty large considering size composition observed increase recent year ...","[(high, 0.4049498520337298), (confidence, 0.39272495084025866), (emission, 0.3591064725582132), (mitigation, 0.22463255943003124), (pathway, 0.17573295465614688), (al, 0.16045182816430803), (et, 0.16045182816430803), (warming, 0.14975503962002082), (co, 0.14669881432165305), (change, 0.13600202577736584), (carbon, 0.12072089928552698), (climate, 0.12072089928552698), (sector, 0.119192786636343...","[(confidence, 0.4056464329257463), (medium, 0.3095722777591222), (emission, 0.29355991856468483), (mitigation, 0.21349812259249806), (warming, 0.19214831033324825), (change, 0.1868108572684358), (energy, 0.1547861388795611), (high, 0.1547861388795611), (pathway, 0.1547861388795611), (co, 0.1334363266203113), (global, 0.1334363266203113), (climate, 0.12809887355549884), (al, 0.12276142049068639...","[(emission, 0.3916103699798974), (eq, 0.28480754180356177), (confidence, 0.24920659907811654), (gtco, 0.24920659907811654), (low, 0.24920659907811654), (2019, 0.1780047136272261), (aggregate, 0.14240377090178089), (carbon, 0.14240377090178089), (change, 0.14240377090178089), (co, 0.14240377090178089), (economic, 0.14240377090178089), (high, 0.14240377090178089), (2010, 0.10680282817633566), (a...",1.0,1.0,1.0,0.9859
6,"\nCross-sectoral Perspectives Chapter 1212\nExecutive Summary\nThe total emission mitigation potential achievable by the year \n2030, calculated based on sectoral assessments, is sufficient \nto reduce global greenhouse gas emissions to half of the \ncurrent (2019) level or less (robust evidence, high agreement). \nThis potential (32–44 GtCO 2-eq) requires implementation of a wide \nrange of ...","cross-sectoral perspective chapter 1212 executive summary total emission mitigation potential achievable year 2030 , calculated based sectoral assessment , sufficient reduce global greenhouse gas emission half current ( 2019 ) level le ( robust evidence , high agreement ) . potential ( 32–44 gtco 2-eq ) requires implementation wide range mitigation option . option mitigation cost lower usd20 t...",chapter_Cross_sectoral.txt,"{'high confidence': 13, 'medium confidence': 2, 'low confidence': 2}","{ 3.2.5 , 3.4.6 , 12.5 } chapter 12 cross-sectoral perspectives12bio-based product part circular bioeconomy potential support adaptation mitigation . key maximising benefit managing trade-off sectoral integration , transparent governance , stakeholder involvement ( high confidence ) . sustainable bioeconomy relying biomass resource need supported technology innovation international cooperation...","key maximising benefit managing trade-off sectoral integration , transparent governance , stakeholder involvement ( high confidence ) . sustainable bioeconomy relying biomass resource need supported technology innovation international cooperation governance global trade disincentivise environmental social externality ( medium confidence ) . { 12.5 , cross-working group box 3 chapter } coordina...",", small versus large ) , includes line sight assessment based . assessment methodology explained annex ii , part iv , section 11. geophysical environmental-ecological technological economic socio-cultural institutionalphysical potential geophysical resource land use air pollution toxic waste , ecotoxicity eutrophication water quantity quality biodiversity simplicity technological scalability m...","[(12, 0.41691385343599274), (section, 0.3188164761569356), (land, 0.2534182246375642), (confidence, 0.17984519167827137), (high, 0.17984519167827137), (use, 0.17984519167827137), (production, 0.16349562879842852), (united, 0.14714606591858567), (food, 0.13079650303874282), (biomass, 0.12262172159882138), (impact, 0.12262172159882138), (soil, 0.12262172159882138), (based, 0.11444694015889996), ...","[(confidence, 0.33407655239053047), (high, 0.2672612419124244), (trade, 0.2672612419124244), (biomass, 0.2004459314343183), (governance, 0.2004459314343183), (off, 0.2004459314343183), (technology, 0.2004459314343183), (based, 0.1336306209562122), (climate, 0.1336306209562122), (coordinated, 0.1336306209562122), (cross, 0.1336306209562122), (energy, 0.1336306209562122), (innovation, 0.13363062...","[(effect, 0.2349781349963872), (enablers, 0.2349781349963872), (acceptance, 0.15665208999759148), (air, 0.15665208999759148), (al, 0.15665208999759148), (assessment, 0.15665208999759148), (barrier, 0.15665208999759148), (capacity, 0.15665208999759148), (carbon, 0.15665208999759148), (confidence, 0.15665208999759148), (economic, 0.15665208999759148), (et, 0.15665208999759148), (geophysical, 0.1...",1.0,0.9997,0.994,0.9325
7,"\nChapter 13 National and Sub-national Policies and Institutions13Executive Summary\nLong-term deep emission reductions, including the reduction \nof emissions to net zero, is best achieved through institutions \nand governance that nurture new mitigation policies, \nwhile at the same time reconsidering existing policies that \nsupport continued Greenhouse Gas (GHG) emissions (robust \nevidenc...","chapter 13 national sub-national policy institutions13executive summary long-term deep emission reduction , including reduction emission net zero , best achieved institution governance nurture new mitigation policy , time reconsidering existing policy support continued greenhouse gas ( ghg ) emission ( robust evidence , high agreement ) . effectively , scope climate governance include direct e...",chapter_national_policies.txt,"{'high confidence': 1, 'medium confidence': 1, 'low confidence': 0}","designing overlap interaction among mitigation policy enhances effectiveness ( robust evidence , high agreement ) . { 13.6 } removing fossil fuel subsidy would reduce emission , improve public revenue macroeconomic performance , yield environmental sustainable development benefit ; subsidy removal may adverse distributional impact especially economically vulnerable group , case mitigated measu...","designing overlap interaction among mitigation policy enhances effectiveness ( robust evidence , high agreement ) . { 13.6 } removing fossil fuel subsidy would reduce emission , improve public revenue macroeconomic performance , yield environmental sustainable development benefit ; subsidy removal may adverse distributional impact especially economically vulnerable group , case mitigated measu...",,"[(emission, 0.25630729731502827), (high, 0.25630729731502827), (mitigation, 0.25630729731502827), (subsidy, 0.25630729731502827), (13, 0.1708715315433522), (agreement, 0.1708715315433522), (confidence, 0.1708715315433522), (evidence, 0.1708715315433522), (fossil, 0.1708715315433522), (fuel, 0.1708715315433522), (medium, 0.1708715315433522), (national, 0.1708715315433522), (policy, 0.1708715315...","[(emission, 0.25630729731502827), (high, 0.25630729731502827), (mitigation, 0.25630729731502827), (subsidy, 0.25630729731502827), (13, 0.1708715315433522), (agreement, 0.1708715315433522), (confidence, 0.1708715315433522), (evidence, 0.1708715315433522), (fossil, 0.1708715315433522), (fuel, 0.1708715315433522), (medium, 0.1708715315433522), (national, 0.1708715315433522), (policy, 0.1708715315...",[],1.0,0.9687,0.9687,0.0
8,"\nInternational cooperation is having positive and measurable \nresults (high confidence). The Kyoto Protocol led to measurable \nand substantial avoided emissions, including in 20 countries with \nKyoto first commitment period targets that have experienced \na decade of declining absolute emissions. It also built national \ncapacity for greenhouse gas (GHG) accounting, catalysed the \ncreati...","international cooperation positive measurable result ( high confidence ) . kyoto protocol led measurable substantial avoided emission , including 20 country kyoto first commitment period target experienced decade declining absolute emission . also built national capacity greenhouse gas ( ghg ) accounting , catalysed creation ghg market , increased investment low-carbon technology ( medium conf...",chapter_intl_coop.txt,"{'high confidence': 18, 'medium confidence': 12, 'low confidence': 3}","international cooperation positive measurable result ( high confidence ) . kyoto protocol led measurable substantial avoided emission , including 20 country kyoto first commitment period target experienced decade declining absolute emission . { 14.3 , 14.5 , 14.6 } new form international cooperation emerged since intergovernmental panel climate change ’ fifth assessment report ( ipcc ar5 ) li...","kyoto protocol led measurable substantial avoided emission , including 20 country kyoto first commitment period target experienced decade declining absolute emission . also built national capacity greenhouse gas ( ghg ) accounting , catalysed creation ghg market , increased investment low-carbon technology ( medium confidence ) . international agreement institution led avoided carbon dioxide (...","regional scale seasonal timescale could considerable residual climate change and/or overcompensating change ( e.g. , cooling , wetting drying ’ needed offset warming , drying wetting due anthropogenic greenhouse gas emission ) , low confidence understanding climate response srm regional scale ( ar6 wgi , chapter 4 ) . sai implemented partially offset warming ( e.g. study used scenario sai depl...","[(14, 0.387513082812802), (confidence, 0.37731537010720195), (high, 0.3161290938736016), (climate, 0.24474510493440127), (agreement, 0.23454739222880122), (cooperation, 0.23454739222880122), (mitigation, 0.23454739222880122), (international, 0.193756541406401), (country, 0.13257026517280068), (support, 0.12237255246720063), (development, 0.11217483976160057), (emission, 0.11217483976160057), (...","[(confidence, 0.39966024133170247), (medium, 0.2960446232086685), (agreement, 0.2812423920482351), (level, 0.2516379297273682), (international, 0.20723123624606796), (ndcs, 0.1924290050856345), (paris, 0.1924290050856345), (14, 0.16282454276476768), (ambition, 0.16282454276476768), (cooperation, 0.14802231160433424), (mitigation, 0.14802231160433424), (country, 0.13322008044390082), (emission,...","[(srm, 0.3735436838188142), (emission, 0.24902912254587614), (confidence, 0.1867718419094071), (impact, 0.1867718419094071), (low, 0.1867718419094071), (reduction, 0.1867718419094071), (scenario, 0.1867718419094071), (study, 0.1867718419094071), (surface, 0.1867718419094071), (ar6, 0.12451456127293807), (change, 0.12451456127293807), (chapter, 0.12451456127293807), (climate, 0.1245145612729380...",1.0,0.9999,0.9997,0.7906
9,"\nBuildings Chapter 99\nExecutive Summary\nGlobal greenhouse gas (GHG) emissions from buildings were \nin 2019 at 12 GtCO 2-eq, equivalent to 21% of global GHG \nemissions that year, out of which 57% were indirect emissions \nfrom offsite generation of electricity and heat, 24% direct \nemissions produced onsite and 18% were embodied emissions \nfrom the use of cement and steel (high evidenc...","building chapter 99 executive summary global greenhouse gas ( ghg ) emission building 2019 12 gtco 2-eq , equivalent 21 % global ghg emission year , 57 % indirect emission offsite generation electricity heat , 24 % direct emission produced onsite 18 % embodied emission use cement steel ( high evidence , high agreement ) . 95 % emission building co2 emission , ch 4 n 2o represented 0.08 % , emi...",chapter_Building.txt,"{'high confidence': 1, 'medium confidence': 0, 'low confidence': 2}","addressing new need residential building may , necessarily mean constructing new building , especially global north . repurposing existing non-residential building , longer use due expected spread teleworking triggered health crisis enabled digitalisation , could way overcome new need officetelschool building triggered health crisis ( low evidence , high confidence ) { 9.1 , 9.2 } . building c...",,"assessment method explained annex ii.11 . geophysical environmental-ecological technological economic socio-cultural institutionalphysical potential geophysical resourses land use air pollution toxic waste , ecotoxicity eutrophication water quantity quality biodiversity simplicity technological scalability maturity technology readiness cost 2030 long term effect employment economic growth publ...","[(building, 0.543305367994433), (emission, 0.3104602102825331), (new, 0.23284515771189984), (co, 0.15523010514126656), (crisis, 0.15523010514126656), (ghg, 0.15523010514126656), (global, 0.15523010514126656), (health, 0.15523010514126656), (need, 0.15523010514126656), (residential, 0.15523010514126656), (triggered, 0.15523010514126656), (12, 0.07761505257063328), (18, 0.07761505257063328), (20...",[],"[(effect, 0.24743582965269675), (enablers, 0.24743582965269675), (acceptance, 0.1649572197684645), (air, 0.1649572197684645), (barrier, 0.1649572197684645), (building, 0.1649572197684645), (capacity, 0.1649572197684645), (change, 0.1649572197684645), (confidence, 0.1649572197684645), (construction, 0.1649572197684645), (economic, 0.1649572197684645), (geophysical, 0.1649572197684645), (level, ...",1.0,-0.765,0.0,0.9657


In [104]:
def extract(text, phrase):
    sentences = nltk.sent_tokenize(text)

    high_confidence_list = []

    for i, sentence in enumerate(sentences):
        #assigning the phrase
        if phrase in sentence:
            phrase_idx = i
            before_idx = i-1
            after_idx = i+1

            # extract the sentences before, containing, and after the phrase
            before = sentences[before_idx] if before_idx >= 0 else ""
            containing = sentence
            after = sentences[after_idx] if after_idx < len(sentences) else ""

            whole = [before, containing, after]
            high_confidence_list.append(whole)

    return high_confidence_list

In [105]:
phrases = ['high confidence', 'medium confidence', 'low confidence']

for i in range(len(phrases)):
    phrase = phrases[i]
    ipcc_processed_csv[f'{phrase}_text'] = ipcc_processed_csv['processed_text'].apply(extract, phrase = phrase)

In [125]:
# clean the last three columns

def clean(text):
    # Flatten the list of lists into a single list. This is because I was unable to do tfdif without merging the lists.
    flat_list = [item for sublist in text for item in sublist]

    # Use map to convert each item in the list to a string
    str_list = map(str, flat_list)

    # Join the list of strings with a space delimiter
    result = ' '.join(str_list)
    return result

ipcc_processed_csv['high confidence_text'] = ipcc_processed_csv['high confidence_text'].apply(clean)
ipcc_processed_csv['medium confidence_text'] = ipcc_processed_csv['medium confidence_text'].apply(clean)
ipcc_processed_csv['low confidence_text'] = ipcc_processed_csv['low confidence_text'].apply(clean)

In [126]:
ipcc_processed_csv.head()

Unnamed: 0,text,processed_text,file_name,confidence_freq,high confidence_text,medium confidence_text,low confidence_text
0,"\nWarming cannot be limited to well below 2°C without rapid \nand deep reductions in energy system carbon dioxide (CO 2) \nand greenhouse gas (GHG) emissions. In scenarios limiting \nwarming to 1.5°C (>50%) with no or limited overshoot (2°C (>67%) \nwith action starting in 2020), net energy system CO 2 emissions \n(interquartile range) fall by 87–97% (60–79%) in 2050. In 2030, \nin scenarios ...","warming limited well 2°c without rapid deep reduction energy system carbon dioxide ( co 2 ) greenhouse gas ( ghg ) emission . scenario limiting warming 1.5°c ( > 50 % ) limited overshoot ( 2°c ( > 67 % ) action starting 2020 ) , net energy system co 2 emission ( interquartile range ) fall 87–97 % ( 60–79 % ) 2050. 2030 , scenario limiting warming 1.5°c ( > 50 % ) limited overshoot , net co 2 g...",chapter_Energy_System.txt,"{'high confidence': 140, 'medium confidence': 26, 'low confidence': 4}","scenario limiting warming 1.5°c ( > 50 % ) limited overshoot ( 2°c ( > 67 % ) ) , net electricity sector co 2 emission reach zero globally 2045 2055 ( 2050 2080 ) . ( high confidence ) { 6.7 } limiting warming well 2°c require substantial energy system change next 30 year . includes reduced fossil fuel consumption , increased production low- zero-carbon energy source , increased use electricit...","advance low-carbon energy resource carrier next-generation biofuels , hydrogen produced electrolysis , synthetic fuel , carbon-neutral ammonia would substantially improve economics net-zero energy system . ( medium confidence ) { 6.4 , 6.7 } global energy system largest source co 2 emission ( chapter 2 ) . reducing energy sector emission therefore essential limit warming . 2020 ) , fundamental...","assessment method explained annex ii.11 . geophysical environmental-ecological technological economic socio-cultural institutionalphysical potential geophysical resource land use air pollution toxic waste , ecotoxicity eutrophication water quantity quality biodiversity simplicity technological scalability maturity technology readiness cost 2030 long term effect employment economic growth publi..."
1,"\nThe Agriculture, Forestry and Other Land Use1 (AFOLU) sector \nencompasses managed ecosystems and offers significant \nmitigation opportunities while delivering food, wood and \nother renewable resources as well as biodiversity conservation, \nprovided the sector adapts to climate change. Land-based \nmitigation measures represent some of the most important options \ncurrently available. The...","agriculture , forestry land use1 ( afolu ) sector encompasses managed ecosystem offer significant mitigation opportunity delivering food , wood renewable resource well biodiversity conservation , provided sector adapts climate change . land-based mitigation measure represent important option currently available . deliver carbon dioxide removal ( cdr ) substitute fossil fuel , thereby enabling ...",chapter_AFLOU.txt,"{'high confidence': 62, 'medium confidence': 65, 'low confidence': 7}","deliver carbon dioxide removal ( cdr ) substitute fossil fuel , thereby enabling emission reduction sector . rapid deployment afolu measure essential pathway staying within limit remaining budget 1.5°c target ( high confidence ) . carefully appropriately implemented , afolu mitigation measure uniquely positioned deliver substantial co-benefits help address many wider challenge associated land ...","time capacity land support function may threatened climate change ( high confidence ) . { ipcc ar6 wgi , figure spm.7 ; ipcc ar6 wgii , 7.1 , 7.6 } afolu ( managed land ) sector , average , accounted 13–21 % global total anthropogenic greenhouse gas ( ghg ) emission period 2010–2019 ( medium confidence ) . time managed natural terrestrial ecosystem carbon sink , absorbing around one third anth...","2015 ) . low confidence remains proposing specific change land condition achieve desired impact local , regional global climate due : poor relationship change surface albedo change surface temperature ( davin de noblet-ducoudré 2010 ) , compensation feedback among biophysical process ( bonan 2016 ; kalliokoski et al . 2020 ) , climate seasonal dependency biophysical effect ( bonan 2016 ) , omi..."
2,"\nIndustry Chapter 1111\nExecutive Summary\nThe Paris Agreement, the Sustainable Development Goals \n(SDGs) and the COVID-19 pandemic provide a new context \nfor the evolution of industry and the mitigation of industry \ngreenhouse gas (GHG) emissions (high confidence). This chapter \nis focused on what is new since AR5. It emphasises the energy and \nemissions intensive basic materials indu...","industry chapter 1111 executive summary paris agreement , sustainable development goal ( sdgs ) covid-19 pandemic provide new context evolution industry mitigation industry greenhouse gas ( ghg ) emission ( high confidence ) . chapter focused new since ar5 . emphasis energy emission intensive basic material industry key strategy reaching net zero emission . { 11.1.1 } net zero co 2 emission in...",chapter_Industry.txt,"{'high confidence': 14, 'medium confidence': 4, 'low confidence': 0}","industry chapter 1111 executive summary paris agreement , sustainable development goal ( sdgs ) covid-19 pandemic provide new context evolution industry mitigation industry greenhouse gas ( ghg ) emission ( high confidence ) . chapter focused new since ar5 . emphasis energy emission intensive basic material industry key strategy reaching net zero emission . { 11.1.1 } net zero co 2 emission i...","{ 11.4.1.3 } scenario analysis show significant cut global ghg emission even close net zero emission ghg intensive industry ( e.g. , steel , plastic , ammonia , cement ) achieved 2050 deploying multiple available emerging option ( medium confidence ) . cutting industry emission significantly requires reorientation historic focus important incremental improvement ( e.g. , biomethane methanol ) ...",
3,"\nExecutive Summary\nInnovation in climate mitigation technologies has seen \nenormous activity and significant progress in recent years. \nInnovation has also led to, and exacerbated, trade-offs \nin relation to sustainable development (high confidence). \nInnovation can leverage action to mitigate climate change by \nreinforcing other interventions. In conjunction with other enabling \ncondi...","executive summary innovation climate mitigation technology seen enormous activity significant progress recent year . innovation also led , exacerbated , trade-off relation sustainable development ( high confidence ) . innovation leverage action mitigate climate change reinforcing intervention . conjunction enabling condition , innovation support system transition limit warming help shift devel...",chapter_innovation.txt,"{'high confidence': 20, 'medium confidence': 7, 'low confidence': 0}","executive summary innovation climate mitigation technology seen enormous activity significant progress recent year . innovation also led , exacerbated , trade-off relation sustainable development ( high confidence ) . innovation leverage action mitigate climate change reinforcing intervention . conjunction enabling condition , innovation support system transition limit warming help shift devel...","suggests development planning innovation capability remains necessary , especially least- developed country sids . international diffusion low-emission technology also facilitated knowledge spillover region engaged clean r & ( medium confidence ) . { 16.6 } evidence role intellectual property right ( ipr ) innovation mixed . { 16.6 } evidence role intellectual property right ( ipr ) innovation...",
4,"\nChapter 10 Transport10Executive Summary\nMeeting climate mitigation goals would require transformative \nchanges in the transport sector (high confidence). In 2019, \ndirect greenhouse gas (GHG) emissions from the transport sector \nwere 8.7 GtCO 2-eq (up from 5.0 GtCO 2-eq in 1990) and accounted \nfor 23% of global energy-related CO 2 emissions. 70% of direct \ntransport emissions came fro...","chapter 10 transport10executive summary meeting climate mitigation goal would require transformative change transport sector ( high confidence ) . 2019 , direct greenhouse gas ( ghg ) emission transport sector 8.7 gtco 2-eq ( 5.0 gtco 2-eq 1990 ) accounted 23 % global energy-related co 2 emission . 70 % direct transport emission came road vehicle , 1 % , 11 % , 12 % came rail , shipping , avia...",chapter_Transport.txt,"{'high confidence': 15, 'medium confidence': 11, 'low confidence': 4}","chapter 10 transport10executive summary meeting climate mitigation goal would require transformative change transport sector ( high confidence ) . 2019 , direct greenhouse gas ( ghg ) emission transport sector 8.7 gtco 2-eq ( 5.0 gtco 2-eq 1990 ) accounted 23 % global energy-related co 2 emission . emission shipping aviation continue grow rapidly . transport- related emission developing regio...","electromobility powered low-carbon electricity potential rapidly reduce transport ghg applied multiple co-benefits developing world ’ growing city ( high confidence ) . { 10.3 , 10.4 , 10.8 } land-based , long-range , heavy-duty truck decarbonised battery electric haulage ( including use electric road system ) , complemented hydrogen- biofuel- based fuel context ( medium confidence ) . technol...",advanced biofuels could provide low-carbon jet fuel ( medium confidence ) . production synthetic fuel using low-carbon hydrogen co 2 captured direct air capture ( dac ) bioenergy carbon capture storage ( beccs ) could provide jet marine fuel option still require demonstration scale ( low confidence ) . ammonia produced low-carbon hydrogen could also serve marine fuel ( medium confidence ) . 20...


 ##### TFIDF

In [127]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer

In [154]:
def get_tfidf(text):
    
    # Create the TF-IDF vectorizer
    vectorizer = TfidfVectorizer()
    
    feature_list = []
    
    try:
        # Fit and transform the vectorizer on the preprocessed text
        tfidf = vectorizer.fit_transform([text])

        # Get the feature names and the corresponding TF-IDF scores
        feature_names = vectorizer.get_feature_names()
        tfidf_scores = tfidf.toarray()[0]

        # Zip the feature names and the corresponding TF-IDF scores together
        features = list(zip(feature_names, tfidf_scores))

        # Sort the features by their TF-IDF scores
        features = sorted(features, key=lambda x: x[1], reverse=True)

        # Print the top 10 features with the highest TF-IDF scores
        for feature in features[:30]:
            feature_list.append(feature)

        return feature_list
    
    except:
        return feature_list
    
        

ipcc_processed_csv['high_tfidf'] = ipcc_processed_csv['high confidence_text'].apply(get_tfidf)
ipcc_processed_csv['medium_tfidf'] = ipcc_processed_csv['medium confidence_text'].apply(get_tfidf)
ipcc_processed_csv['low_tfidf'] = ipcc_processed_csv['low confidence_text'].apply(get_tfidf)



In [155]:
ipcc_processed_csv.head()

Unnamed: 0,text,processed_text,file_name,confidence_freq,high confidence_text,medium confidence_text,low confidence_text,high_tfidf,medium_tfidf,low_tfidf
0,"\nWarming cannot be limited to well below 2°C without rapid \nand deep reductions in energy system carbon dioxide (CO 2) \nand greenhouse gas (GHG) emissions. In scenarios limiting \nwarming to 1.5°C (>50%) with no or limited overshoot (2°C (>67%) \nwith action starting in 2020), net energy system CO 2 emissions \n(interquartile range) fall by 87–97% (60–79%) in 2050. In 2030, \nin scenarios ...","warming limited well 2°c without rapid deep reduction energy system carbon dioxide ( co 2 ) greenhouse gas ( ghg ) emission . scenario limiting warming 1.5°c ( > 50 % ) limited overshoot ( 2°c ( > 67 % ) action starting 2020 ) , net energy system co 2 emission ( interquartile range ) fall 87–97 % ( 60–79 % ) 2050. 2030 , scenario limiting warming 1.5°c ( > 50 % ) limited overshoot , net co 2 g...",chapter_Energy_System.txt,"{'high confidence': 140, 'medium confidence': 26, 'low confidence': 4}","scenario limiting warming 1.5°c ( > 50 % ) limited overshoot ( 2°c ( > 67 % ) ) , net electricity sector co 2 emission reach zero globally 2045 2055 ( 2050 2080 ) . ( high confidence ) { 6.7 } limiting warming well 2°c require substantial energy system change next 30 year . includes reduced fossil fuel consumption , increased production low- zero-carbon energy source , increased use electricit...","advance low-carbon energy resource carrier next-generation biofuels , hydrogen produced electrolysis , synthetic fuel , carbon-neutral ammonia would substantially improve economics net-zero energy system . ( medium confidence ) { 6.4 , 6.7 } global energy system largest source co 2 emission ( chapter 2 ) . reducing energy sector emission therefore essential limit warming . 2020 ) , fundamental...","assessment method explained annex ii.11 . geophysical environmental-ecological technological economic socio-cultural institutionalphysical potential geophysical resource land use air pollution toxic waste , ecotoxicity eutrophication water quantity quality biodiversity simplicity technological scalability maturity technology readiness cost 2030 long term effect employment economic growth publi...","[(energy, 0.4676206229823656), (confidence, 0.39468896618695076), (high, 0.39468896618695076), (system, 0.24668060386684423), (cost, 0.16302370342504488), (al, 0.15229845977866036), (et, 0.15015341104938346), (electricity, 0.14157321613227583), (carbon, 0.1308479724858913), (low, 0.12870292375661438), (fossil, 0.10939748519312223), (fuel, 0.10939748519312223), (zero, 0.09867224154673769), (win...","[(confidence, 0.3911657266912561), (medium, 0.2825085803881294), (energy, 0.26077715112750405), (warming, 0.22818000723656606), (electricity, 0.2064485779759407), (al, 0.19558286334562805), (et, 0.19558286334562805), (cost, 0.16298571945469004), (low, 0.15212000482437738), (2020, 0.1412542901940647), (high, 0.1412542901940647), (use, 0.13038857556375202), (zero, 0.13038857556375202), (carbon, ...","[(confidence, 0.3232299675777271), (energy, 0.3232299675777271), (low, 0.2770542579237661), (wind, 0.2770542579237661), (climate, 0.18470283861584408), (effect, 0.18470283861584408), (high, 0.18470283861584408), (al, 0.13852712896188304), (carbon, 0.13852712896188304), (change, 0.13852712896188304), (enablers, 0.13852712896188304), (et, 0.13852712896188304), (impact, 0.13852712896188304), (res..."
1,"\nThe Agriculture, Forestry and Other Land Use1 (AFOLU) sector \nencompasses managed ecosystems and offers significant \nmitigation opportunities while delivering food, wood and \nother renewable resources as well as biodiversity conservation, \nprovided the sector adapts to climate change. Land-based \nmitigation measures represent some of the most important options \ncurrently available. The...","agriculture , forestry land use1 ( afolu ) sector encompasses managed ecosystem offer significant mitigation opportunity delivering food , wood renewable resource well biodiversity conservation , provided sector adapts climate change . land-based mitigation measure represent important option currently available . deliver carbon dioxide removal ( cdr ) substitute fossil fuel , thereby enabling ...",chapter_AFLOU.txt,"{'high confidence': 62, 'medium confidence': 65, 'low confidence': 7}","deliver carbon dioxide removal ( cdr ) substitute fossil fuel , thereby enabling emission reduction sector . rapid deployment afolu measure essential pathway staying within limit remaining budget 1.5°c target ( high confidence ) . carefully appropriately implemented , afolu mitigation measure uniquely positioned deliver substantial co-benefits help address many wider challenge associated land ...","time capacity land support function may threatened climate change ( high confidence ) . { ipcc ar6 wgi , figure spm.7 ; ipcc ar6 wgii , 7.1 , 7.6 } afolu ( managed land ) sector , average , accounted 13–21 % global total anthropogenic greenhouse gas ( ghg ) emission period 2010–2019 ( medium confidence ) . time managed natural terrestrial ecosystem carbon sink , absorbing around one third anth...","2015 ) . low confidence remains proposing specific change land condition achieve desired impact local , regional global climate due : poor relationship change surface albedo change surface temperature ( davin de noblet-ducoudré 2010 ) , compensation feedback among biophysical process ( bonan 2016 ; kalliokoski et al . 2020 ) , climate seasonal dependency biophysical effect ( bonan 2016 ) , omi...","[(confidence, 0.42545103103257703), (high, 0.3403608248260616), (emission, 0.30632474234345547), (mitigation, 0.26377963924019776), (yr, 0.22123453613694005), (afolu, 0.19570747427498544), (potential, 0.13614432993042466), (climate, 0.12763530930977313), (land, 0.12763530930977313), (gtco, 0.12338079899944734), (measure, 0.11912628868912158), (2020, 0.11487177837879581), (net, 0.11061726806847...","[(confidence, 0.36968858181893005), (yr, 0.3245044218188386), (medium, 0.2875355636369456), (gtco, 0.26288965818235027), (emission, 0.2546743563641518), (potential, 0.23824375272775491), (eq, 0.1971672436367627), (mitigation, 0.18484429090946503), (al, 0.1643060363639689), (et, 0.1643060363639689), (co, 0.13555248000027434), (land, 0.12322952727297667), (global, 0.11912187636387746), (net, 0.1...","[(confidence, 0.35874800166708765), (carbon, 0.25112360116696136), (low, 0.25112360116696136), (potential, 0.21524880100025257), (al, 0.17937400083354382), (change, 0.17937400083354382), (et, 0.17937400083354382), (mitigation, 0.17937400083354382), (yr, 0.17937400083354382), (climate, 0.14349920066683505), (2010, 0.10762440050012628), (2050, 0.10762440050012628), (coastal, 0.10762440050012628)..."
2,"\nIndustry Chapter 1111\nExecutive Summary\nThe Paris Agreement, the Sustainable Development Goals \n(SDGs) and the COVID-19 pandemic provide a new context \nfor the evolution of industry and the mitigation of industry \ngreenhouse gas (GHG) emissions (high confidence). This chapter \nis focused on what is new since AR5. It emphasises the energy and \nemissions intensive basic materials indu...","industry chapter 1111 executive summary paris agreement , sustainable development goal ( sdgs ) covid-19 pandemic provide new context evolution industry mitigation industry greenhouse gas ( ghg ) emission ( high confidence ) . chapter focused new since ar5 . emphasis energy emission intensive basic material industry key strategy reaching net zero emission . { 11.1.1 } net zero co 2 emission in...",chapter_Industry.txt,"{'high confidence': 14, 'medium confidence': 4, 'low confidence': 0}","industry chapter 1111 executive summary paris agreement , sustainable development goal ( sdgs ) covid-19 pandemic provide new context evolution industry mitigation industry greenhouse gas ( ghg ) emission ( high confidence ) . chapter focused new since ar5 . emphasis energy emission intensive basic material industry key strategy reaching net zero emission . { 11.1.1 } net zero co 2 emission i...","{ 11.4.1.3 } scenario analysis show significant cut global ghg emission even close net zero emission ghg intensive industry ( e.g. , steel , plastic , ammonia , cement ) achieved 2050 deploying multiple available emerging option ( medium confidence ) . cutting industry emission significantly requires reorientation historic focus important incremental improvement ( e.g. , biomethane methanol ) ...",,"[(11, 0.3818730893140701), (emission, 0.3818730893140701), (high, 0.2603680154414114), (material, 0.2603680154414114), (confidence, 0.24301014774531735), (energy, 0.17357867696094095), (low, 0.17357867696094095), (carbon, 0.13886294156875276), (zero, 0.13886294156875276), (efficiency, 0.12150507387265867), (ghg, 0.12150507387265867), (industry, 0.12150507387265867), (use, 0.12150507387265867),...","[(emission, 0.4740454631399772), (11, 0.2844272778839863), (industry, 0.2844272778839863), (carbon, 0.1896181852559909), (confidence, 0.1896181852559909), (cost, 0.1896181852559909), (low, 0.1896181852559909), (medium, 0.1896181852559909), (cement, 0.14221363894199315), (production, 0.14221363894199315), (source, 0.14221363894199315), (zero, 0.14221363894199315), (basic, 0.09480909262799544), ...",[]
3,"\nExecutive Summary\nInnovation in climate mitigation technologies has seen \nenormous activity and significant progress in recent years. \nInnovation has also led to, and exacerbated, trade-offs \nin relation to sustainable development (high confidence). \nInnovation can leverage action to mitigate climate change by \nreinforcing other interventions. In conjunction with other enabling \ncondi...","executive summary innovation climate mitigation technology seen enormous activity significant progress recent year . innovation also led , exacerbated , trade-off relation sustainable development ( high confidence ) . innovation leverage action mitigate climate change reinforcing intervention . conjunction enabling condition , innovation support system transition limit warming help shift devel...",chapter_innovation.txt,"{'high confidence': 20, 'medium confidence': 7, 'low confidence': 0}","executive summary innovation climate mitigation technology seen enormous activity significant progress recent year . innovation also led , exacerbated , trade-off relation sustainable development ( high confidence ) . innovation leverage action mitigate climate change reinforcing intervention . conjunction enabling condition , innovation support system transition limit warming help shift devel...","suggests development planning innovation capability remains necessary , especially least- developed country sids . international diffusion low-emission technology also facilitated knowledge spillover region engaged clean r & ( medium confidence ) . { 16.6 } evidence role intellectual property right ( ipr ) innovation mixed . { 16.6 } evidence role intellectual property right ( ipr ) innovation...",,"[(innovation, 0.4776659519992759), (16, 0.41536169739067474), (high, 0.28036914573870547), (confidence, 0.25960106086917173), (technology, 0.21806489113010424), (development, 0.1661446789562699), (system, 0.14537659408673614), (technological, 0.1349925516519693), (box, 0.12460850921720242), (change, 0.12460850921720242), (climate, 0.10384042434766869), (country, 0.10384042434766869), (emission...","[(confidence, 0.2980099775410749), (technology, 0.2980099775410749), (medium, 0.26820897978696745), (change, 0.20860698427875243), (country, 0.17880598652464494), (mitigation, 0.17880598652464494), (also, 0.14900498877053744), (climate, 0.14900498877053744), (development, 0.14900498877053744), (emission, 0.14900498877053744), (innovation, 0.14900498877053744), (16, 0.11920399101642996), (chapt...",[]
4,"\nChapter 10 Transport10Executive Summary\nMeeting climate mitigation goals would require transformative \nchanges in the transport sector (high confidence). In 2019, \ndirect greenhouse gas (GHG) emissions from the transport sector \nwere 8.7 GtCO 2-eq (up from 5.0 GtCO 2-eq in 1990) and accounted \nfor 23% of global energy-related CO 2 emissions. 70% of direct \ntransport emissions came fro...","chapter 10 transport10executive summary meeting climate mitigation goal would require transformative change transport sector ( high confidence ) . 2019 , direct greenhouse gas ( ghg ) emission transport sector 8.7 gtco 2-eq ( 5.0 gtco 2-eq 1990 ) accounted 23 % global energy-related co 2 emission . 70 % direct transport emission came road vehicle , 1 % , 11 % , 12 % came rail , shipping , avia...",chapter_Transport.txt,"{'high confidence': 15, 'medium confidence': 11, 'low confidence': 4}","chapter 10 transport10executive summary meeting climate mitigation goal would require transformative change transport sector ( high confidence ) . 2019 , direct greenhouse gas ( ghg ) emission transport sector 8.7 gtco 2-eq ( 5.0 gtco 2-eq 1990 ) accounted 23 % global energy-related co 2 emission . emission shipping aviation continue grow rapidly . transport- related emission developing regio...","electromobility powered low-carbon electricity potential rapidly reduce transport ghg applied multiple co-benefits developing world ’ growing city ( high confidence ) . { 10.3 , 10.4 , 10.8 } land-based , long-range , heavy-duty truck decarbonised battery electric haulage ( including use electric road system ) , complemented hydrogen- biofuel- based fuel context ( medium confidence ) . technol...",advanced biofuels could provide low-carbon jet fuel ( medium confidence ) . production synthetic fuel using low-carbon hydrogen co 2 captured direct air capture ( dac ) bioenergy carbon capture storage ( beccs ) could provide jet marine fuel option still require demonstration scale ( low confidence ) . ammonia produced low-carbon hydrogen could also serve marine fuel ( medium confidence ) . 20...,"[(10, 0.3903352467758598), (confidence, 0.35130172209827376), (transport, 0.35130172209827376), (high, 0.32527937231321646), (fuel, 0.1951676233879299), (emission, 0.16914527360287257), (hydrogen, 0.1561340987103439), (carbon, 0.1301117489252866), (demand, 0.11710057403275793), (low, 0.11710057403275793), (sector, 0.11710057403275793), (aviation, 0.10408939914022927), (based, 0.104089399140229...","[(10, 0.4219146387646126), (confidence, 0.38816146766344356), (medium, 0.28690195435993654), (fuel, 0.25314878325876755), (could, 0.18564244105642955), (carbon, 0.16876585550584502), (low, 0.16876585550584502), (electric, 0.15188926995526053), (hydrogen, 0.15188926995526053), (based, 0.13501268440467604), (system, 0.13501268440467604), (transport, 0.13501268440467604), (decarbonisation, 0.1181...","[(confidence, 0.26594532430676177), (low, 0.26594532430676177), (aviation, 0.2216211035889681), (fuel, 0.2216211035889681), (land, 0.2216211035889681), (shipping, 0.2216211035889681), (transport, 0.2216211035889681), (carbon, 0.1772968828711745), (effect, 0.1772968828711745), (contrail, 0.13297266215338088), (could, 0.13297266215338088), (enablers, 0.13297266215338088), (hydrogen, 0.1329726621..."


In [156]:
print(ipcc_processed.loc[0, 'medium_tfidf'])

[('confidence', 0.3911657266912561), ('medium', 0.2825085803881294), ('energy', 0.26077715112750405), ('warming', 0.22818000723656606), ('electricity', 0.2064485779759407), ('al', 0.19558286334562805), ('et', 0.19558286334562805), ('cost', 0.16298571945469004), ('low', 0.15212000482437738), ('2020', 0.1412542901940647), ('high', 0.1412542901940647), ('use', 0.13038857556375202), ('zero', 0.13038857556375202), ('carbon', 0.11952286093343936), ('limited', 0.11952286093343936), ('scenario', 0.11952286093343936), ('emission', 0.1086571463031267), ('limiting', 0.1086571463031267), ('fuel', 0.09779143167281402), ('system', 0.09779143167281402), ('67', 0.08692571704250135), ('electrification', 0.08692571704250135), ('net', 0.08692571704250135), ('range', 0.08692571704250135), ('resource', 0.08692571704250135), ('source', 0.08692571704250135), ('2050', 0.07606000241218869), ('50', 0.07606000241218869), ('co', 0.07606000241218869), ('gas', 0.07606000241218869)]


### Sentiment

In [158]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Initialize VADER so we can use it later
sentimentAnalyser = SentimentIntensityAnalyzer()

In [159]:
def calculate_sentiment(text):
    scores = sentimentAnalyser.polarity_scores(text)
    compound_score = scores['compound']
    return compound_score

In [161]:
ipcc_processed_csv['overall_sentiment'] = ipcc_processed_csv['processed_text'].apply(calculate_sentiment)

In [163]:
ipcc_processed_csv['sentiment_high'] = ipcc_processed_csv['high confidence_text'].apply(calculate_sentiment)
ipcc_processed_csv['sentiment_medium'] = ipcc_processed_csv['medium confidence_text'].apply(calculate_sentiment)
ipcc_processed_csv['sentiment_low'] = ipcc_processed_csv['low confidence_text'].apply(calculate_sentiment)

In [164]:
ipcc_processed_csv.head()

Unnamed: 0,text,processed_text,file_name,confidence_freq,high confidence_text,medium confidence_text,low confidence_text,high_tfidf,medium_tfidf,low_tfidf,overall_sentiment,sentiment_high,sentiment_medium,sentiment_low
0,"\nWarming cannot be limited to well below 2°C without rapid \nand deep reductions in energy system carbon dioxide (CO 2) \nand greenhouse gas (GHG) emissions. In scenarios limiting \nwarming to 1.5°C (>50%) with no or limited overshoot (2°C (>67%) \nwith action starting in 2020), net energy system CO 2 emissions \n(interquartile range) fall by 87–97% (60–79%) in 2050. In 2030, \nin scenarios ...","warming limited well 2°c without rapid deep reduction energy system carbon dioxide ( co 2 ) greenhouse gas ( ghg ) emission . scenario limiting warming 1.5°c ( > 50 % ) limited overshoot ( 2°c ( > 67 % ) action starting 2020 ) , net energy system co 2 emission ( interquartile range ) fall 87–97 % ( 60–79 % ) 2050. 2030 , scenario limiting warming 1.5°c ( > 50 % ) limited overshoot , net co 2 g...",chapter_Energy_System.txt,"{'high confidence': 140, 'medium confidence': 26, 'low confidence': 4}","scenario limiting warming 1.5°c ( > 50 % ) limited overshoot ( 2°c ( > 67 % ) ) , net electricity sector co 2 emission reach zero globally 2045 2055 ( 2050 2080 ) . ( high confidence ) { 6.7 } limiting warming well 2°c require substantial energy system change next 30 year . includes reduced fossil fuel consumption , increased production low- zero-carbon energy source , increased use electricit...","advance low-carbon energy resource carrier next-generation biofuels , hydrogen produced electrolysis , synthetic fuel , carbon-neutral ammonia would substantially improve economics net-zero energy system . ( medium confidence ) { 6.4 , 6.7 } global energy system largest source co 2 emission ( chapter 2 ) . reducing energy sector emission therefore essential limit warming . 2020 ) , fundamental...","assessment method explained annex ii.11 . geophysical environmental-ecological technological economic socio-cultural institutionalphysical potential geophysical resource land use air pollution toxic waste , ecotoxicity eutrophication water quantity quality biodiversity simplicity technological scalability maturity technology readiness cost 2030 long term effect employment economic growth publi...","[(energy, 0.4676206229823656), (confidence, 0.39468896618695076), (high, 0.39468896618695076), (system, 0.24668060386684423), (cost, 0.16302370342504488), (al, 0.15229845977866036), (et, 0.15015341104938346), (electricity, 0.14157321613227583), (carbon, 0.1308479724858913), (low, 0.12870292375661438), (fossil, 0.10939748519312223), (fuel, 0.10939748519312223), (zero, 0.09867224154673769), (win...","[(confidence, 0.3911657266912561), (medium, 0.2825085803881294), (energy, 0.26077715112750405), (warming, 0.22818000723656606), (electricity, 0.2064485779759407), (al, 0.19558286334562805), (et, 0.19558286334562805), (cost, 0.16298571945469004), (low, 0.15212000482437738), (2020, 0.1412542901940647), (high, 0.1412542901940647), (use, 0.13038857556375202), (zero, 0.13038857556375202), (carbon, ...","[(confidence, 0.3232299675777271), (energy, 0.3232299675777271), (low, 0.2770542579237661), (wind, 0.2770542579237661), (climate, 0.18470283861584408), (effect, 0.18470283861584408), (high, 0.18470283861584408), (al, 0.13852712896188304), (carbon, 0.13852712896188304), (change, 0.13852712896188304), (enablers, 0.13852712896188304), (et, 0.13852712896188304), (impact, 0.13852712896188304), (res...",1.0,1.0,0.9997,0.9906
1,"\nThe Agriculture, Forestry and Other Land Use1 (AFOLU) sector \nencompasses managed ecosystems and offers significant \nmitigation opportunities while delivering food, wood and \nother renewable resources as well as biodiversity conservation, \nprovided the sector adapts to climate change. Land-based \nmitigation measures represent some of the most important options \ncurrently available. The...","agriculture , forestry land use1 ( afolu ) sector encompasses managed ecosystem offer significant mitigation opportunity delivering food , wood renewable resource well biodiversity conservation , provided sector adapts climate change . land-based mitigation measure represent important option currently available . deliver carbon dioxide removal ( cdr ) substitute fossil fuel , thereby enabling ...",chapter_AFLOU.txt,"{'high confidence': 62, 'medium confidence': 65, 'low confidence': 7}","deliver carbon dioxide removal ( cdr ) substitute fossil fuel , thereby enabling emission reduction sector . rapid deployment afolu measure essential pathway staying within limit remaining budget 1.5°c target ( high confidence ) . carefully appropriately implemented , afolu mitigation measure uniquely positioned deliver substantial co-benefits help address many wider challenge associated land ...","time capacity land support function may threatened climate change ( high confidence ) . { ipcc ar6 wgi , figure spm.7 ; ipcc ar6 wgii , 7.1 , 7.6 } afolu ( managed land ) sector , average , accounted 13–21 % global total anthropogenic greenhouse gas ( ghg ) emission period 2010–2019 ( medium confidence ) . time managed natural terrestrial ecosystem carbon sink , absorbing around one third anth...","2015 ) . low confidence remains proposing specific change land condition achieve desired impact local , regional global climate due : poor relationship change surface albedo change surface temperature ( davin de noblet-ducoudré 2010 ) , compensation feedback among biophysical process ( bonan 2016 ; kalliokoski et al . 2020 ) , climate seasonal dependency biophysical effect ( bonan 2016 ) , omi...","[(confidence, 0.42545103103257703), (high, 0.3403608248260616), (emission, 0.30632474234345547), (mitigation, 0.26377963924019776), (yr, 0.22123453613694005), (afolu, 0.19570747427498544), (potential, 0.13614432993042466), (climate, 0.12763530930977313), (land, 0.12763530930977313), (gtco, 0.12338079899944734), (measure, 0.11912628868912158), (2020, 0.11487177837879581), (net, 0.11061726806847...","[(confidence, 0.36968858181893005), (yr, 0.3245044218188386), (medium, 0.2875355636369456), (gtco, 0.26288965818235027), (emission, 0.2546743563641518), (potential, 0.23824375272775491), (eq, 0.1971672436367627), (mitigation, 0.18484429090946503), (al, 0.1643060363639689), (et, 0.1643060363639689), (co, 0.13555248000027434), (land, 0.12322952727297667), (global, 0.11912187636387746), (net, 0.1...","[(confidence, 0.35874800166708765), (carbon, 0.25112360116696136), (low, 0.25112360116696136), (potential, 0.21524880100025257), (al, 0.17937400083354382), (change, 0.17937400083354382), (et, 0.17937400083354382), (mitigation, 0.17937400083354382), (yr, 0.17937400083354382), (climate, 0.14349920066683505), (2010, 0.10762440050012628), (2050, 0.10762440050012628), (coastal, 0.10762440050012628)...",1.0,0.9999,0.9999,0.962
2,"\nIndustry Chapter 1111\nExecutive Summary\nThe Paris Agreement, the Sustainable Development Goals \n(SDGs) and the COVID-19 pandemic provide a new context \nfor the evolution of industry and the mitigation of industry \ngreenhouse gas (GHG) emissions (high confidence). This chapter \nis focused on what is new since AR5. It emphasises the energy and \nemissions intensive basic materials indu...","industry chapter 1111 executive summary paris agreement , sustainable development goal ( sdgs ) covid-19 pandemic provide new context evolution industry mitigation industry greenhouse gas ( ghg ) emission ( high confidence ) . chapter focused new since ar5 . emphasis energy emission intensive basic material industry key strategy reaching net zero emission . { 11.1.1 } net zero co 2 emission in...",chapter_Industry.txt,"{'high confidence': 14, 'medium confidence': 4, 'low confidence': 0}","industry chapter 1111 executive summary paris agreement , sustainable development goal ( sdgs ) covid-19 pandemic provide new context evolution industry mitigation industry greenhouse gas ( ghg ) emission ( high confidence ) . chapter focused new since ar5 . emphasis energy emission intensive basic material industry key strategy reaching net zero emission . { 11.1.1 } net zero co 2 emission i...","{ 11.4.1.3 } scenario analysis show significant cut global ghg emission even close net zero emission ghg intensive industry ( e.g. , steel , plastic , ammonia , cement ) achieved 2050 deploying multiple available emerging option ( medium confidence ) . cutting industry emission significantly requires reorientation historic focus important incremental improvement ( e.g. , biomethane methanol ) ...",,"[(11, 0.3818730893140701), (emission, 0.3818730893140701), (high, 0.2603680154414114), (material, 0.2603680154414114), (confidence, 0.24301014774531735), (energy, 0.17357867696094095), (low, 0.17357867696094095), (carbon, 0.13886294156875276), (zero, 0.13886294156875276), (efficiency, 0.12150507387265867), (ghg, 0.12150507387265867), (industry, 0.12150507387265867), (use, 0.12150507387265867),...","[(emission, 0.4740454631399772), (11, 0.2844272778839863), (industry, 0.2844272778839863), (carbon, 0.1896181852559909), (confidence, 0.1896181852559909), (cost, 0.1896181852559909), (low, 0.1896181852559909), (medium, 0.1896181852559909), (cement, 0.14221363894199315), (production, 0.14221363894199315), (source, 0.14221363894199315), (zero, 0.14221363894199315), (basic, 0.09480909262799544), ...",[],1.0,0.9986,0.9682,0.0
3,"\nExecutive Summary\nInnovation in climate mitigation technologies has seen \nenormous activity and significant progress in recent years. \nInnovation has also led to, and exacerbated, trade-offs \nin relation to sustainable development (high confidence). \nInnovation can leverage action to mitigate climate change by \nreinforcing other interventions. In conjunction with other enabling \ncondi...","executive summary innovation climate mitigation technology seen enormous activity significant progress recent year . innovation also led , exacerbated , trade-off relation sustainable development ( high confidence ) . innovation leverage action mitigate climate change reinforcing intervention . conjunction enabling condition , innovation support system transition limit warming help shift devel...",chapter_innovation.txt,"{'high confidence': 20, 'medium confidence': 7, 'low confidence': 0}","executive summary innovation climate mitigation technology seen enormous activity significant progress recent year . innovation also led , exacerbated , trade-off relation sustainable development ( high confidence ) . innovation leverage action mitigate climate change reinforcing intervention . conjunction enabling condition , innovation support system transition limit warming help shift devel...","suggests development planning innovation capability remains necessary , especially least- developed country sids . international diffusion low-emission technology also facilitated knowledge spillover region engaged clean r & ( medium confidence ) . { 16.6 } evidence role intellectual property right ( ipr ) innovation mixed . { 16.6 } evidence role intellectual property right ( ipr ) innovation...",,"[(innovation, 0.4776659519992759), (16, 0.41536169739067474), (high, 0.28036914573870547), (confidence, 0.25960106086917173), (technology, 0.21806489113010424), (development, 0.1661446789562699), (system, 0.14537659408673614), (technological, 0.1349925516519693), (box, 0.12460850921720242), (change, 0.12460850921720242), (climate, 0.10384042434766869), (country, 0.10384042434766869), (emission...","[(confidence, 0.2980099775410749), (technology, 0.2980099775410749), (medium, 0.26820897978696745), (change, 0.20860698427875243), (country, 0.17880598652464494), (mitigation, 0.17880598652464494), (also, 0.14900498877053744), (climate, 0.14900498877053744), (development, 0.14900498877053744), (emission, 0.14900498877053744), (innovation, 0.14900498877053744), (16, 0.11920399101642996), (chapt...",[],1.0,0.9999,0.9987,0.0
4,"\nChapter 10 Transport10Executive Summary\nMeeting climate mitigation goals would require transformative \nchanges in the transport sector (high confidence). In 2019, \ndirect greenhouse gas (GHG) emissions from the transport sector \nwere 8.7 GtCO 2-eq (up from 5.0 GtCO 2-eq in 1990) and accounted \nfor 23% of global energy-related CO 2 emissions. 70% of direct \ntransport emissions came fro...","chapter 10 transport10executive summary meeting climate mitigation goal would require transformative change transport sector ( high confidence ) . 2019 , direct greenhouse gas ( ghg ) emission transport sector 8.7 gtco 2-eq ( 5.0 gtco 2-eq 1990 ) accounted 23 % global energy-related co 2 emission . 70 % direct transport emission came road vehicle , 1 % , 11 % , 12 % came rail , shipping , avia...",chapter_Transport.txt,"{'high confidence': 15, 'medium confidence': 11, 'low confidence': 4}","chapter 10 transport10executive summary meeting climate mitigation goal would require transformative change transport sector ( high confidence ) . 2019 , direct greenhouse gas ( ghg ) emission transport sector 8.7 gtco 2-eq ( 5.0 gtco 2-eq 1990 ) accounted 23 % global energy-related co 2 emission . emission shipping aviation continue grow rapidly . transport- related emission developing regio...","electromobility powered low-carbon electricity potential rapidly reduce transport ghg applied multiple co-benefits developing world ’ growing city ( high confidence ) . { 10.3 , 10.4 , 10.8 } land-based , long-range , heavy-duty truck decarbonised battery electric haulage ( including use electric road system ) , complemented hydrogen- biofuel- based fuel context ( medium confidence ) . technol...",advanced biofuels could provide low-carbon jet fuel ( medium confidence ) . production synthetic fuel using low-carbon hydrogen co 2 captured direct air capture ( dac ) bioenergy carbon capture storage ( beccs ) could provide jet marine fuel option still require demonstration scale ( low confidence ) . ammonia produced low-carbon hydrogen could also serve marine fuel ( medium confidence ) . 20...,"[(10, 0.3903352467758598), (confidence, 0.35130172209827376), (transport, 0.35130172209827376), (high, 0.32527937231321646), (fuel, 0.1951676233879299), (emission, 0.16914527360287257), (hydrogen, 0.1561340987103439), (carbon, 0.1301117489252866), (demand, 0.11710057403275793), (low, 0.11710057403275793), (sector, 0.11710057403275793), (aviation, 0.10408939914022927), (based, 0.104089399140229...","[(10, 0.4219146387646126), (confidence, 0.38816146766344356), (medium, 0.28690195435993654), (fuel, 0.25314878325876755), (could, 0.18564244105642955), (carbon, 0.16876585550584502), (low, 0.16876585550584502), (electric, 0.15188926995526053), (hydrogen, 0.15188926995526053), (based, 0.13501268440467604), (system, 0.13501268440467604), (transport, 0.13501268440467604), (decarbonisation, 0.1181...","[(confidence, 0.26594532430676177), (low, 0.26594532430676177), (aviation, 0.2216211035889681), (fuel, 0.2216211035889681), (land, 0.2216211035889681), (shipping, 0.2216211035889681), (transport, 0.2216211035889681), (carbon, 0.1772968828711745), (effect, 0.1772968828711745), (contrail, 0.13297266215338088), (could, 0.13297266215338088), (enablers, 0.13297266215338088), (hydrogen, 0.1329726621...",1.0,0.9996,0.9987,0.9826


In [167]:
display(ipcc_processed_csv)

Unnamed: 0,text,processed_text,file_name,confidence_freq,high confidence_text,medium confidence_text,low confidence_text,high_tfidf,medium_tfidf,low_tfidf,overall_sentiment,sentiment_high,sentiment_medium,sentiment_low
0,"\nWarming cannot be limited to well below 2°C without rapid \nand deep reductions in energy system carbon dioxide (CO 2) \nand greenhouse gas (GHG) emissions. In scenarios limiting \nwarming to 1.5°C (>50%) with no or limited overshoot (2°C (>67%) \nwith action starting in 2020), net energy system CO 2 emissions \n(interquartile range) fall by 87–97% (60–79%) in 2050. In 2030, \nin scenarios ...","warming limited well 2°c without rapid deep reduction energy system carbon dioxide ( co 2 ) greenhouse gas ( ghg ) emission . scenario limiting warming 1.5°c ( > 50 % ) limited overshoot ( 2°c ( > 67 % ) action starting 2020 ) , net energy system co 2 emission ( interquartile range ) fall 87–97 % ( 60–79 % ) 2050. 2030 , scenario limiting warming 1.5°c ( > 50 % ) limited overshoot , net co 2 g...",chapter_Energy_System.txt,"{'high confidence': 140, 'medium confidence': 26, 'low confidence': 4}","scenario limiting warming 1.5°c ( > 50 % ) limited overshoot ( 2°c ( > 67 % ) ) , net electricity sector co 2 emission reach zero globally 2045 2055 ( 2050 2080 ) . ( high confidence ) { 6.7 } limiting warming well 2°c require substantial energy system change next 30 year . includes reduced fossil fuel consumption , increased production low- zero-carbon energy source , increased use electricit...","advance low-carbon energy resource carrier next-generation biofuels , hydrogen produced electrolysis , synthetic fuel , carbon-neutral ammonia would substantially improve economics net-zero energy system . ( medium confidence ) { 6.4 , 6.7 } global energy system largest source co 2 emission ( chapter 2 ) . reducing energy sector emission therefore essential limit warming . 2020 ) , fundamental...","assessment method explained annex ii.11 . geophysical environmental-ecological technological economic socio-cultural institutionalphysical potential geophysical resource land use air pollution toxic waste , ecotoxicity eutrophication water quantity quality biodiversity simplicity technological scalability maturity technology readiness cost 2030 long term effect employment economic growth publi...","[(energy, 0.4676206229823656), (confidence, 0.39468896618695076), (high, 0.39468896618695076), (system, 0.24668060386684423), (cost, 0.16302370342504488), (al, 0.15229845977866036), (et, 0.15015341104938346), (electricity, 0.14157321613227583), (carbon, 0.1308479724858913), (low, 0.12870292375661438), (fossil, 0.10939748519312223), (fuel, 0.10939748519312223), (zero, 0.09867224154673769), (win...","[(confidence, 0.3911657266912561), (medium, 0.2825085803881294), (energy, 0.26077715112750405), (warming, 0.22818000723656606), (electricity, 0.2064485779759407), (al, 0.19558286334562805), (et, 0.19558286334562805), (cost, 0.16298571945469004), (low, 0.15212000482437738), (2020, 0.1412542901940647), (high, 0.1412542901940647), (use, 0.13038857556375202), (zero, 0.13038857556375202), (carbon, ...","[(confidence, 0.3232299675777271), (energy, 0.3232299675777271), (low, 0.2770542579237661), (wind, 0.2770542579237661), (climate, 0.18470283861584408), (effect, 0.18470283861584408), (high, 0.18470283861584408), (al, 0.13852712896188304), (carbon, 0.13852712896188304), (change, 0.13852712896188304), (enablers, 0.13852712896188304), (et, 0.13852712896188304), (impact, 0.13852712896188304), (res...",1.0,1.0,0.9997,0.9906
1,"\nThe Agriculture, Forestry and Other Land Use1 (AFOLU) sector \nencompasses managed ecosystems and offers significant \nmitigation opportunities while delivering food, wood and \nother renewable resources as well as biodiversity conservation, \nprovided the sector adapts to climate change. Land-based \nmitigation measures represent some of the most important options \ncurrently available. The...","agriculture , forestry land use1 ( afolu ) sector encompasses managed ecosystem offer significant mitigation opportunity delivering food , wood renewable resource well biodiversity conservation , provided sector adapts climate change . land-based mitigation measure represent important option currently available . deliver carbon dioxide removal ( cdr ) substitute fossil fuel , thereby enabling ...",chapter_AFLOU.txt,"{'high confidence': 62, 'medium confidence': 65, 'low confidence': 7}","deliver carbon dioxide removal ( cdr ) substitute fossil fuel , thereby enabling emission reduction sector . rapid deployment afolu measure essential pathway staying within limit remaining budget 1.5°c target ( high confidence ) . carefully appropriately implemented , afolu mitigation measure uniquely positioned deliver substantial co-benefits help address many wider challenge associated land ...","time capacity land support function may threatened climate change ( high confidence ) . { ipcc ar6 wgi , figure spm.7 ; ipcc ar6 wgii , 7.1 , 7.6 } afolu ( managed land ) sector , average , accounted 13–21 % global total anthropogenic greenhouse gas ( ghg ) emission period 2010–2019 ( medium confidence ) . time managed natural terrestrial ecosystem carbon sink , absorbing around one third anth...","2015 ) . low confidence remains proposing specific change land condition achieve desired impact local , regional global climate due : poor relationship change surface albedo change surface temperature ( davin de noblet-ducoudré 2010 ) , compensation feedback among biophysical process ( bonan 2016 ; kalliokoski et al . 2020 ) , climate seasonal dependency biophysical effect ( bonan 2016 ) , omi...","[(confidence, 0.42545103103257703), (high, 0.3403608248260616), (emission, 0.30632474234345547), (mitigation, 0.26377963924019776), (yr, 0.22123453613694005), (afolu, 0.19570747427498544), (potential, 0.13614432993042466), (climate, 0.12763530930977313), (land, 0.12763530930977313), (gtco, 0.12338079899944734), (measure, 0.11912628868912158), (2020, 0.11487177837879581), (net, 0.11061726806847...","[(confidence, 0.36968858181893005), (yr, 0.3245044218188386), (medium, 0.2875355636369456), (gtco, 0.26288965818235027), (emission, 0.2546743563641518), (potential, 0.23824375272775491), (eq, 0.1971672436367627), (mitigation, 0.18484429090946503), (al, 0.1643060363639689), (et, 0.1643060363639689), (co, 0.13555248000027434), (land, 0.12322952727297667), (global, 0.11912187636387746), (net, 0.1...","[(confidence, 0.35874800166708765), (carbon, 0.25112360116696136), (low, 0.25112360116696136), (potential, 0.21524880100025257), (al, 0.17937400083354382), (change, 0.17937400083354382), (et, 0.17937400083354382), (mitigation, 0.17937400083354382), (yr, 0.17937400083354382), (climate, 0.14349920066683505), (2010, 0.10762440050012628), (2050, 0.10762440050012628), (coastal, 0.10762440050012628)...",1.0,0.9999,0.9999,0.962
2,"\nIndustry Chapter 1111\nExecutive Summary\nThe Paris Agreement, the Sustainable Development Goals \n(SDGs) and the COVID-19 pandemic provide a new context \nfor the evolution of industry and the mitigation of industry \ngreenhouse gas (GHG) emissions (high confidence). This chapter \nis focused on what is new since AR5. It emphasises the energy and \nemissions intensive basic materials indu...","industry chapter 1111 executive summary paris agreement , sustainable development goal ( sdgs ) covid-19 pandemic provide new context evolution industry mitigation industry greenhouse gas ( ghg ) emission ( high confidence ) . chapter focused new since ar5 . emphasis energy emission intensive basic material industry key strategy reaching net zero emission . { 11.1.1 } net zero co 2 emission in...",chapter_Industry.txt,"{'high confidence': 14, 'medium confidence': 4, 'low confidence': 0}","industry chapter 1111 executive summary paris agreement , sustainable development goal ( sdgs ) covid-19 pandemic provide new context evolution industry mitigation industry greenhouse gas ( ghg ) emission ( high confidence ) . chapter focused new since ar5 . emphasis energy emission intensive basic material industry key strategy reaching net zero emission . { 11.1.1 } net zero co 2 emission i...","{ 11.4.1.3 } scenario analysis show significant cut global ghg emission even close net zero emission ghg intensive industry ( e.g. , steel , plastic , ammonia , cement ) achieved 2050 deploying multiple available emerging option ( medium confidence ) . cutting industry emission significantly requires reorientation historic focus important incremental improvement ( e.g. , biomethane methanol ) ...",,"[(11, 0.3818730893140701), (emission, 0.3818730893140701), (high, 0.2603680154414114), (material, 0.2603680154414114), (confidence, 0.24301014774531735), (energy, 0.17357867696094095), (low, 0.17357867696094095), (carbon, 0.13886294156875276), (zero, 0.13886294156875276), (efficiency, 0.12150507387265867), (ghg, 0.12150507387265867), (industry, 0.12150507387265867), (use, 0.12150507387265867),...","[(emission, 0.4740454631399772), (11, 0.2844272778839863), (industry, 0.2844272778839863), (carbon, 0.1896181852559909), (confidence, 0.1896181852559909), (cost, 0.1896181852559909), (low, 0.1896181852559909), (medium, 0.1896181852559909), (cement, 0.14221363894199315), (production, 0.14221363894199315), (source, 0.14221363894199315), (zero, 0.14221363894199315), (basic, 0.09480909262799544), ...",[],1.0,0.9986,0.9682,0.0
3,"\nExecutive Summary\nInnovation in climate mitigation technologies has seen \nenormous activity and significant progress in recent years. \nInnovation has also led to, and exacerbated, trade-offs \nin relation to sustainable development (high confidence). \nInnovation can leverage action to mitigate climate change by \nreinforcing other interventions. In conjunction with other enabling \ncondi...","executive summary innovation climate mitigation technology seen enormous activity significant progress recent year . innovation also led , exacerbated , trade-off relation sustainable development ( high confidence ) . innovation leverage action mitigate climate change reinforcing intervention . conjunction enabling condition , innovation support system transition limit warming help shift devel...",chapter_innovation.txt,"{'high confidence': 20, 'medium confidence': 7, 'low confidence': 0}","executive summary innovation climate mitigation technology seen enormous activity significant progress recent year . innovation also led , exacerbated , trade-off relation sustainable development ( high confidence ) . innovation leverage action mitigate climate change reinforcing intervention . conjunction enabling condition , innovation support system transition limit warming help shift devel...","suggests development planning innovation capability remains necessary , especially least- developed country sids . international diffusion low-emission technology also facilitated knowledge spillover region engaged clean r & ( medium confidence ) . { 16.6 } evidence role intellectual property right ( ipr ) innovation mixed . { 16.6 } evidence role intellectual property right ( ipr ) innovation...",,"[(innovation, 0.4776659519992759), (16, 0.41536169739067474), (high, 0.28036914573870547), (confidence, 0.25960106086917173), (technology, 0.21806489113010424), (development, 0.1661446789562699), (system, 0.14537659408673614), (technological, 0.1349925516519693), (box, 0.12460850921720242), (change, 0.12460850921720242), (climate, 0.10384042434766869), (country, 0.10384042434766869), (emission...","[(confidence, 0.2980099775410749), (technology, 0.2980099775410749), (medium, 0.26820897978696745), (change, 0.20860698427875243), (country, 0.17880598652464494), (mitigation, 0.17880598652464494), (also, 0.14900498877053744), (climate, 0.14900498877053744), (development, 0.14900498877053744), (emission, 0.14900498877053744), (innovation, 0.14900498877053744), (16, 0.11920399101642996), (chapt...",[],1.0,0.9999,0.9987,0.0
4,"\nChapter 10 Transport10Executive Summary\nMeeting climate mitigation goals would require transformative \nchanges in the transport sector (high confidence). In 2019, \ndirect greenhouse gas (GHG) emissions from the transport sector \nwere 8.7 GtCO 2-eq (up from 5.0 GtCO 2-eq in 1990) and accounted \nfor 23% of global energy-related CO 2 emissions. 70% of direct \ntransport emissions came fro...","chapter 10 transport10executive summary meeting climate mitigation goal would require transformative change transport sector ( high confidence ) . 2019 , direct greenhouse gas ( ghg ) emission transport sector 8.7 gtco 2-eq ( 5.0 gtco 2-eq 1990 ) accounted 23 % global energy-related co 2 emission . 70 % direct transport emission came road vehicle , 1 % , 11 % , 12 % came rail , shipping , avia...",chapter_Transport.txt,"{'high confidence': 15, 'medium confidence': 11, 'low confidence': 4}","chapter 10 transport10executive summary meeting climate mitigation goal would require transformative change transport sector ( high confidence ) . 2019 , direct greenhouse gas ( ghg ) emission transport sector 8.7 gtco 2-eq ( 5.0 gtco 2-eq 1990 ) accounted 23 % global energy-related co 2 emission . emission shipping aviation continue grow rapidly . transport- related emission developing regio...","electromobility powered low-carbon electricity potential rapidly reduce transport ghg applied multiple co-benefits developing world ’ growing city ( high confidence ) . { 10.3 , 10.4 , 10.8 } land-based , long-range , heavy-duty truck decarbonised battery electric haulage ( including use electric road system ) , complemented hydrogen- biofuel- based fuel context ( medium confidence ) . technol...",advanced biofuels could provide low-carbon jet fuel ( medium confidence ) . production synthetic fuel using low-carbon hydrogen co 2 captured direct air capture ( dac ) bioenergy carbon capture storage ( beccs ) could provide jet marine fuel option still require demonstration scale ( low confidence ) . ammonia produced low-carbon hydrogen could also serve marine fuel ( medium confidence ) . 20...,"[(10, 0.3903352467758598), (confidence, 0.35130172209827376), (transport, 0.35130172209827376), (high, 0.32527937231321646), (fuel, 0.1951676233879299), (emission, 0.16914527360287257), (hydrogen, 0.1561340987103439), (carbon, 0.1301117489252866), (demand, 0.11710057403275793), (low, 0.11710057403275793), (sector, 0.11710057403275793), (aviation, 0.10408939914022927), (based, 0.104089399140229...","[(10, 0.4219146387646126), (confidence, 0.38816146766344356), (medium, 0.28690195435993654), (fuel, 0.25314878325876755), (could, 0.18564244105642955), (carbon, 0.16876585550584502), (low, 0.16876585550584502), (electric, 0.15188926995526053), (hydrogen, 0.15188926995526053), (based, 0.13501268440467604), (system, 0.13501268440467604), (transport, 0.13501268440467604), (decarbonisation, 0.1181...","[(confidence, 0.26594532430676177), (low, 0.26594532430676177), (aviation, 0.2216211035889681), (fuel, 0.2216211035889681), (land, 0.2216211035889681), (shipping, 0.2216211035889681), (transport, 0.2216211035889681), (carbon, 0.1772968828711745), (effect, 0.1772968828711745), (contrail, 0.13297266215338088), (could, 0.13297266215338088), (enablers, 0.13297266215338088), (hydrogen, 0.1329726621...",1.0,0.9996,0.9987,0.9826
5,"\nExecutive Summary\nGlobal net anthropogenic greenhouse gas (GHG) emissions \nduring the last decade (2010–2019) were higher than at any \nprevious time in human history (high confidence). Since 2010, \nGHG emissions have continued to grow, reaching 59 ± 6.6 GtCO 2-eq \nin 2019,1 but the average annual growth in the last decade \n(1.3%, 2010–2019) was lower than in the previous decade (2.1%,...","executive summary global net anthropogenic greenhouse gas ( ghg ) emission last decade ( 2010–2019 ) higher previous time human history ( high confidence ) . since 2010 , ghg emission continued grow , reaching 59 ± 6.6 gtco 2-eq 2019,1 average annual growth last decade ( 1.3 % , 2010–2019 ) lower previous decade ( 2.1 % , 2000–2009 ) ( high confidence ) . average annual ghg emission 9.1 gtco 2...",chapter_mitigation.txt,"{'high confidence': 178, 'medium confidence': 54, 'low confidence': 5}","executive summary global net anthropogenic greenhouse gas ( ghg ) emission last decade ( 2010–2019 ) higher previous time human history ( high confidence ) . since 2010 , ghg emission continued grow , reaching 59 ± 6.6 gtco 2-eq 2019,1 average annual growth last decade ( 1.3 % , 2010–2019 ) lower previous decade ( 2.1 % , 2000–2009 ) ( high confidence ) . executive summary global net anthropo...","{ 2.4.1 , figure 2.16 } global covid-19 pandemic led steep drop co 2 emission fossil fuel industry ( high confidence ) . global co 2-ffi emission dropped 2020 5.8 % ( 5.1–6.3 % ) 2.2 ( 1.9–2.4 ) gtco 2 compared 2019. emission , however , rebounded globally end december 2020 ( medium confidence ) . { 2.2.2 , figure 2.6 } commonly used wide part literature climate change mitigation required repo...","ghg emission reached co2 emission ffi 38 ( ±3.0 ) gt , co 2 lulucf 6.6 ± 4.6 gt , ch 4 11 ± 3.2 gtco 2-eq , n 2o 2.7 ± 1.6 gtco 2-eq f-gases 1.4 ± 0.41 gtco 2-eq . high confidence average annual ghg emission last decade ( 2010–2019 ) highest record term aggregate co 2-eq emission , low confidence annual emission 2019 uncertainty large considering size composition observed increase recent year ...","[(high, 0.4049498520337298), (confidence, 0.39272495084025866), (emission, 0.3591064725582132), (mitigation, 0.22463255943003124), (pathway, 0.17573295465614688), (al, 0.16045182816430803), (et, 0.16045182816430803), (warming, 0.14975503962002082), (co, 0.14669881432165305), (change, 0.13600202577736584), (carbon, 0.12072089928552698), (climate, 0.12072089928552698), (sector, 0.119192786636343...","[(confidence, 0.4056464329257463), (medium, 0.3095722777591222), (emission, 0.29355991856468483), (mitigation, 0.21349812259249806), (warming, 0.19214831033324825), (change, 0.1868108572684358), (energy, 0.1547861388795611), (high, 0.1547861388795611), (pathway, 0.1547861388795611), (co, 0.1334363266203113), (global, 0.1334363266203113), (climate, 0.12809887355549884), (al, 0.12276142049068639...","[(emission, 0.3916103699798974), (eq, 0.28480754180356177), (confidence, 0.24920659907811654), (gtco, 0.24920659907811654), (low, 0.24920659907811654), (2019, 0.1780047136272261), (aggregate, 0.14240377090178089), (carbon, 0.14240377090178089), (change, 0.14240377090178089), (co, 0.14240377090178089), (economic, 0.14240377090178089), (high, 0.14240377090178089), (2010, 0.10680282817633566), (a...",1.0,1.0,1.0,0.9859
6,"\nCross-sectoral Perspectives Chapter 1212\nExecutive Summary\nThe total emission mitigation potential achievable by the year \n2030, calculated based on sectoral assessments, is sufficient \nto reduce global greenhouse gas emissions to half of the \ncurrent (2019) level or less (robust evidence, high agreement). \nThis potential (32–44 GtCO 2-eq) requires implementation of a wide \nrange of ...","cross-sectoral perspective chapter 1212 executive summary total emission mitigation potential achievable year 2030 , calculated based sectoral assessment , sufficient reduce global greenhouse gas emission half current ( 2019 ) level le ( robust evidence , high agreement ) . potential ( 32–44 gtco 2-eq ) requires implementation wide range mitigation option . option mitigation cost lower usd20 t...",chapter_Cross_sectoral.txt,"{'high confidence': 13, 'medium confidence': 2, 'low confidence': 2}","{ 3.2.5 , 3.4.6 , 12.5 } chapter 12 cross-sectoral perspectives12bio-based product part circular bioeconomy potential support adaptation mitigation . key maximising benefit managing trade-off sectoral integration , transparent governance , stakeholder involvement ( high confidence ) . sustainable bioeconomy relying biomass resource need supported technology innovation international cooperation...","key maximising benefit managing trade-off sectoral integration , transparent governance , stakeholder involvement ( high confidence ) . sustainable bioeconomy relying biomass resource need supported technology innovation international cooperation governance global trade disincentivise environmental social externality ( medium confidence ) . { 12.5 , cross-working group box 3 chapter } coordina...",", small versus large ) , includes line sight assessment based . assessment methodology explained annex ii , part iv , section 11. geophysical environmental-ecological technological economic socio-cultural institutionalphysical potential geophysical resource land use air pollution toxic waste , ecotoxicity eutrophication water quantity quality biodiversity simplicity technological scalability m...","[(12, 0.41691385343599274), (section, 0.3188164761569356), (land, 0.2534182246375642), (confidence, 0.17984519167827137), (high, 0.17984519167827137), (use, 0.17984519167827137), (production, 0.16349562879842852), (united, 0.14714606591858567), (food, 0.13079650303874282), (biomass, 0.12262172159882138), (impact, 0.12262172159882138), (soil, 0.12262172159882138), (based, 0.11444694015889996), ...","[(confidence, 0.33407655239053047), (high, 0.2672612419124244), (trade, 0.2672612419124244), (biomass, 0.2004459314343183), (governance, 0.2004459314343183), (off, 0.2004459314343183), (technology, 0.2004459314343183), (based, 0.1336306209562122), (climate, 0.1336306209562122), (coordinated, 0.1336306209562122), (cross, 0.1336306209562122), (energy, 0.1336306209562122), (innovation, 0.13363062...","[(effect, 0.2349781349963872), (enablers, 0.2349781349963872), (acceptance, 0.15665208999759148), (air, 0.15665208999759148), (al, 0.15665208999759148), (assessment, 0.15665208999759148), (barrier, 0.15665208999759148), (capacity, 0.15665208999759148), (carbon, 0.15665208999759148), (confidence, 0.15665208999759148), (economic, 0.15665208999759148), (et, 0.15665208999759148), (geophysical, 0.1...",1.0,0.9997,0.994,0.9325
7,"\nChapter 13 National and Sub-national Policies and Institutions13Executive Summary\nLong-term deep emission reductions, including the reduction \nof emissions to net zero, is best achieved through institutions \nand governance that nurture new mitigation policies, \nwhile at the same time reconsidering existing policies that \nsupport continued Greenhouse Gas (GHG) emissions (robust \nevidenc...","chapter 13 national sub-national policy institutions13executive summary long-term deep emission reduction , including reduction emission net zero , best achieved institution governance nurture new mitigation policy , time reconsidering existing policy support continued greenhouse gas ( ghg ) emission ( robust evidence , high agreement ) . effectively , scope climate governance include direct e...",chapter_national_policies.txt,"{'high confidence': 1, 'medium confidence': 1, 'low confidence': 0}","designing overlap interaction among mitigation policy enhances effectiveness ( robust evidence , high agreement ) . { 13.6 } removing fossil fuel subsidy would reduce emission , improve public revenue macroeconomic performance , yield environmental sustainable development benefit ; subsidy removal may adverse distributional impact especially economically vulnerable group , case mitigated measu...","designing overlap interaction among mitigation policy enhances effectiveness ( robust evidence , high agreement ) . { 13.6 } removing fossil fuel subsidy would reduce emission , improve public revenue macroeconomic performance , yield environmental sustainable development benefit ; subsidy removal may adverse distributional impact especially economically vulnerable group , case mitigated measu...",,"[(emission, 0.25630729731502827), (high, 0.25630729731502827), (mitigation, 0.25630729731502827), (subsidy, 0.25630729731502827), (13, 0.1708715315433522), (agreement, 0.1708715315433522), (confidence, 0.1708715315433522), (evidence, 0.1708715315433522), (fossil, 0.1708715315433522), (fuel, 0.1708715315433522), (medium, 0.1708715315433522), (national, 0.1708715315433522), (policy, 0.1708715315...","[(emission, 0.25630729731502827), (high, 0.25630729731502827), (mitigation, 0.25630729731502827), (subsidy, 0.25630729731502827), (13, 0.1708715315433522), (agreement, 0.1708715315433522), (confidence, 0.1708715315433522), (evidence, 0.1708715315433522), (fossil, 0.1708715315433522), (fuel, 0.1708715315433522), (medium, 0.1708715315433522), (national, 0.1708715315433522), (policy, 0.1708715315...",[],1.0,0.9687,0.9687,0.0
8,"\nInternational cooperation is having positive and measurable \nresults (high confidence). The Kyoto Protocol led to measurable \nand substantial avoided emissions, including in 20 countries with \nKyoto first commitment period targets that have experienced \na decade of declining absolute emissions. It also built national \ncapacity for greenhouse gas (GHG) accounting, catalysed the \ncreati...","international cooperation positive measurable result ( high confidence ) . kyoto protocol led measurable substantial avoided emission , including 20 country kyoto first commitment period target experienced decade declining absolute emission . also built national capacity greenhouse gas ( ghg ) accounting , catalysed creation ghg market , increased investment low-carbon technology ( medium conf...",chapter_intl_coop.txt,"{'high confidence': 18, 'medium confidence': 12, 'low confidence': 3}","international cooperation positive measurable result ( high confidence ) . kyoto protocol led measurable substantial avoided emission , including 20 country kyoto first commitment period target experienced decade declining absolute emission . { 14.3 , 14.5 , 14.6 } new form international cooperation emerged since intergovernmental panel climate change ’ fifth assessment report ( ipcc ar5 ) li...","kyoto protocol led measurable substantial avoided emission , including 20 country kyoto first commitment period target experienced decade declining absolute emission . also built national capacity greenhouse gas ( ghg ) accounting , catalysed creation ghg market , increased investment low-carbon technology ( medium confidence ) . international agreement institution led avoided carbon dioxide (...","regional scale seasonal timescale could considerable residual climate change and/or overcompensating change ( e.g. , cooling , wetting drying ’ needed offset warming , drying wetting due anthropogenic greenhouse gas emission ) , low confidence understanding climate response srm regional scale ( ar6 wgi , chapter 4 ) . sai implemented partially offset warming ( e.g. study used scenario sai depl...","[(14, 0.387513082812802), (confidence, 0.37731537010720195), (high, 0.3161290938736016), (climate, 0.24474510493440127), (agreement, 0.23454739222880122), (cooperation, 0.23454739222880122), (mitigation, 0.23454739222880122), (international, 0.193756541406401), (country, 0.13257026517280068), (support, 0.12237255246720063), (development, 0.11217483976160057), (emission, 0.11217483976160057), (...","[(confidence, 0.39966024133170247), (medium, 0.2960446232086685), (agreement, 0.2812423920482351), (level, 0.2516379297273682), (international, 0.20723123624606796), (ndcs, 0.1924290050856345), (paris, 0.1924290050856345), (14, 0.16282454276476768), (ambition, 0.16282454276476768), (cooperation, 0.14802231160433424), (mitigation, 0.14802231160433424), (country, 0.13322008044390082), (emission,...","[(srm, 0.3735436838188142), (emission, 0.24902912254587614), (confidence, 0.1867718419094071), (impact, 0.1867718419094071), (low, 0.1867718419094071), (reduction, 0.1867718419094071), (scenario, 0.1867718419094071), (study, 0.1867718419094071), (surface, 0.1867718419094071), (ar6, 0.12451456127293807), (change, 0.12451456127293807), (chapter, 0.12451456127293807), (climate, 0.1245145612729380...",1.0,0.9999,0.9997,0.7906
9,"\nBuildings Chapter 99\nExecutive Summary\nGlobal greenhouse gas (GHG) emissions from buildings were \nin 2019 at 12 GtCO 2-eq, equivalent to 21% of global GHG \nemissions that year, out of which 57% were indirect emissions \nfrom offsite generation of electricity and heat, 24% direct \nemissions produced onsite and 18% were embodied emissions \nfrom the use of cement and steel (high evidenc...","building chapter 99 executive summary global greenhouse gas ( ghg ) emission building 2019 12 gtco 2-eq , equivalent 21 % global ghg emission year , 57 % indirect emission offsite generation electricity heat , 24 % direct emission produced onsite 18 % embodied emission use cement steel ( high evidence , high agreement ) . 95 % emission building co2 emission , ch 4 n 2o represented 0.08 % , emi...",chapter_Building.txt,"{'high confidence': 1, 'medium confidence': 0, 'low confidence': 2}","addressing new need residential building may , necessarily mean constructing new building , especially global north . repurposing existing non-residential building , longer use due expected spread teleworking triggered health crisis enabled digitalisation , could way overcome new need officetelschool building triggered health crisis ( low evidence , high confidence ) { 9.1 , 9.2 } . building c...",,"assessment method explained annex ii.11 . geophysical environmental-ecological technological economic socio-cultural institutionalphysical potential geophysical resourses land use air pollution toxic waste , ecotoxicity eutrophication water quantity quality biodiversity simplicity technological scalability maturity technology readiness cost 2030 long term effect employment economic growth publ...","[(building, 0.543305367994433), (emission, 0.3104602102825331), (new, 0.23284515771189984), (co, 0.15523010514126656), (crisis, 0.15523010514126656), (ghg, 0.15523010514126656), (global, 0.15523010514126656), (health, 0.15523010514126656), (need, 0.15523010514126656), (residential, 0.15523010514126656), (triggered, 0.15523010514126656), (12, 0.07761505257063328), (18, 0.07761505257063328), (20...",[],"[(effect, 0.24743582965269675), (enablers, 0.24743582965269675), (acceptance, 0.1649572197684645), (air, 0.1649572197684645), (barrier, 0.1649572197684645), (building, 0.1649572197684645), (capacity, 0.1649572197684645), (change, 0.1649572197684645), (confidence, 0.1649572197684645), (construction, 0.1649572197684645), (economic, 0.1649572197684645), (geophysical, 0.1649572197684645), (level, ...",1.0,-0.765,0.0,0.9657
