# LangChain ZeroShot classification models

In [1]:
import pandas as pd
import numpy as np
import re
import os

from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import classification_report

# Set openai API key
openai_api_key = os.getenv("OPENAI_API_KEY")

In [2]:
# Load the data
val = pd.read_json('../Classifiers/Data/cat_hearings_03_10_val.json')
test = pd.read_json('../Classifiers/Data/cat_hearings_03_10_test.json')

## Model setup


In [3]:
# Set the model temperature to zero to make it deterministic and enable reproducibility
chat = ChatOpenAI(temperature=0, openai_api_key=openai_api_key)
llm = OpenAI(temperature=0, openai_api_key=openai_api_key)

# Create the chat and the llm models

template = """ 
You are a classification model designed to classify paragraphs into different categories. Your task is to determine the most appropriate class for each paragraph. There are three classes of climate policy opposition, CLASS 1, CLASS 2, and CLASS 3 as well as a class for paragraphs that do not fit into any opposition category called NO CLAIM. Here are the descriptions for each class:

CLASS 1: Opposition to climate change legislation because this type of legislation is expensive, is bad for the economy, threatens competitiveness, harms vulnerable members of society, harms the the global poor, harms national security, harms the environment and species, has unpredictable consequences, or limits liberty and freedom.

CLASS 2: Opposition to climate change legislation based on concerns regarding the effectiveness of clean energy and jobs, the belief that climate policy has negligible impact on addressing climate change, the perception that climate policy only makes a difference in the distant future, the belief that the contribution of a single country or region to global emissions is negligible, the idea that climate action is futile in light of other countries' emissions, concerns about the vulnerability of climate policy to manipulation, or examples of existing climate policies that are deemed to be failing.

CLASS 3: Opposition to climate change legislation based on the argument that markets, the private sector, voluntary policies, or individuals will solve the climate change crisis, that it is better to adapt to climate change or geoengineer the climate, that future generations will be richer and better able to solve the problem, that technological and scientific progress will solve the climate change crisis, that we should invest in mitigation technology instead of mitigating climate change via policy, that we should focus on helping other countries reduce their emissions, that we should address other problems first, that climate policy is too difficult or premature, that climate-friendly technologies and practices are unavailable, that fossil fuels are a key part of the solution, that fossil fuel reserves are plentiful, that fossil fuels are cheap or good for the economy, society, or the environment, that fossil fuels are important for economic growth or development, that fossil fuel extraction is important for our energy security, that our fossil fuels are cleaner than others, that fossil fuels are necessary to meet the energy demand,that fossil fuels are necessary for the industry, or that we have the right to profit from fossil fuels like others have.

NO CLAIM: If a paragraph does not fall into any of the above classes, it should be classified as NO CLAIM. This includes paragraphs that do not express opposition to climate change legislation, as well as those expressing doubt about climate change, climate change science, or the climate change movement, provided they do not mention any specific topics from CLASS 1, CLASS 2, or CLASS 3.

Your task is to classify any given paragraph into one or more of the three classes CLASS 1, CLASS 2, and CLASS 3, or if none of these apply, into the class NO CLAIM. Return the paragraph class, or classes in more than one applies, but don't output any explanation or text.

# % TEXT
# {text}

# YOUR RESPONSE:
# """

prompt_template = PromptTemplate(input_variables=["text"], template=template)

# Holds the 'classification' chat chain
classification_chat = LLMChain(llm=chat, prompt=prompt_template)

# Holds the 'classification' llm chain
classification_llm = LLMChain(llm=llm, prompt=prompt_template)

## Chat model

In [4]:
# Chat model predictions ('gpt-3.5-turbo')

input_list = []
output_list = []
for i in val.text:
    result = classification_chat.run(i)
    # print(result)
    input_list.append(i)
    output_list.append(result)

# Bind the two lists into a dataframe

pd.DataFrame(list(zip(input_list, output_list)), columns =['input', 'output'])

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised RateLimitError: That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 0e2071f100b7f7e7d93557399e5f363f in your message.).


Unnamed: 0,input,output
0,"Yes, thank you, Senator. This is a very import...",CLASS 3
1,The next slide shows the observed temperatures...,NO CLAIM
2,I think Mr. Goo said it all. I really think we...,NO CLAIM
3,"Particularly in the U.S., we are facing a very...",NO CLAIM
4,"As you will see in my written testimony, my ow...",CLASS 1
...,...,...
248,"I said by itself. In other words, absence of f...",NO CLAIM
249,A simple example of why border adjustments wil...,NO CLAIM
250,"In my written testimony, I show with a simple ...",NO CLAIM
251,And on one hand it doesn't. But everybody's go...,NO CLAIM


In [5]:
# Chat model results on validation data

# Append the output_list to the val dataframe to a new column called "predicted_labels_text"
val['predicted_labels_chat'] = output_list
# Multi-hot encode the labels
mlb = MultiLabelBinarizer()
val['predicted_labels_chat'] = pd.DataFrame(mlb.fit_transform(val.predicted_labels_chat.apply(lambda x: re.findall(r'\d+', x))),columns=mlb.classes_, index=val.index).values.tolist()

# Print the classification report
print("Classification performance\n------------")
print(classification_report(val['labels'].values.tolist(), val['predicted_labels_chat'].values.tolist()))

# Check overall performance
# Print the classification report
print("Overall performance\n------------")
print(classification_report([1 if sum(i)>0 else 0 for i in val['labels']], [1 if sum(i)>0 else 0 for i in val['predicted_labels_chat']]))

Classification performance
------------
              precision    recall  f1-score   support

           0       0.68      0.78      0.73        60
           1       0.57      0.30      0.39        27
           2       0.56      0.25      0.35        36

   micro avg       0.65      0.52      0.58       123
   macro avg       0.61      0.44      0.49       123
weighted avg       0.62      0.52      0.54       123
 samples avg       0.25      0.22      0.23       123

Overall performance
------------
              precision    recall  f1-score   support

           0       0.84      0.84      0.84       153
           1       0.76      0.75      0.75       100

    accuracy                           0.81       253
   macro avg       0.80      0.80      0.80       253
weighted avg       0.81      0.81      0.81       253



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [425]:
# Chat model results on validation data

# Append the output_list to the val dataframe to a new column called "predicted_labels_text"
val['predicted_labels_chat'] = output_list
# Multi-hot encode the labels
mlb = MultiLabelBinarizer()
val['predicted_labels_chat'] = pd.DataFrame(mlb.fit_transform(val.predicted_labels_chat.apply(lambda x: re.findall(r'\d+', x))),columns=mlb.classes_, index=val.index).values.tolist()

# Print the classification report
print("Classification performance\n------------")
print(classification_report(val['labels'].values.tolist(), val['predicted_labels_chat'].values.tolist()))

# Check overall performance
# Print the classification report
print("Overall performance\n------------")
print(classification_report([1 if sum(i)>0 else 0 for i in val['labels']], [1 if sum(i)>0 else 0 for i in val['predicted_labels_chat']]))

Classification performance
------------
              precision    recall  f1-score   support

           0       0.68      0.80      0.73        60
           1       0.64      0.33      0.44        27
           2       0.57      0.22      0.32        36

   micro avg       0.66      0.53      0.59       123
   macro avg       0.63      0.45      0.50       123
weighted avg       0.64      0.53      0.55       123
 samples avg       0.25      0.22      0.23       123

Overall performance
------------
              precision    recall  f1-score   support

           0       0.83      0.84      0.84       153
           1       0.76      0.74      0.75       100

    accuracy                           0.80       253
   macro avg       0.79      0.79      0.79       253
weighted avg       0.80      0.80      0.80       253



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## LLM Model

In [7]:
# LLM model predictions ('text-davinci-003')

input_list_llm = []
output_list_llm = []
for i in val.text:
    result = classification_llm.run(i)
    # print(result)
    input_list_llm.append(i)
    output_list_llm.append(result)

# Bind the two lists into a dataframe

pd.DataFrame(list(zip(input_list_llm, output_list_llm)), columns =['input', 'output'])

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: The server had an error while processing your request. Sorry about that!.
Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: The server had an error while processing your request. Sorry about that!.
Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: The server had an error while processing your request. Sorry about that!.
Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: The server had an error while processing your request. Sorry about that!.
Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: The server had an error while processing your request. Sorr

Unnamed: 0,input,output
0,"Yes, thank you, Senator. This is a very import...",CLASS 3
1,The next slide shows the observed temperatures...,NO CLAIM
2,I think Mr. Goo said it all. I really think we...,NO CLAIM
3,"Particularly in the U.S., we are facing a very...",NO CLAIM
4,"As you will see in my written testimony, my ow...",CLASS 2
...,...,...
248,"I said by itself. In other words, absence of f...",NO CLAIM
249,A simple example of why border adjustments wil...,CLASS 3
250,"In my written testimony, I show with a simple ...",NO CLAIM
251,And on one hand it doesn't. But everybody's go...,NO CLAIM


In [8]:
# LLM model results on validation data

# Append the output_list to the val dataframe to a new column called "predicted_labels_text"
val['predicted_labels_llm'] = output_list_llm
# Multi-hot encode the labels
mlb = MultiLabelBinarizer()
val['predicted_labels_llm'] = pd.DataFrame(mlb.fit_transform(val.predicted_labels_llm.apply(lambda x: re.findall(r'\d+', x))),columns=mlb.classes_, index=val.index).values.tolist()

# Print the classification report
print("Classification performance\n------------")
print(classification_report(val['labels'].values.tolist(), val['predicted_labels_llm'].values.tolist()))

# Check overall performance
# Print the classification report
print("Overall performance\n------------")
print(classification_report([1 if sum(i)>0 else 0 for i in val['labels']], [1 if sum(i)>0 else 0 for i in val['predicted_labels_llm']]))

Classification performance
------------
              precision    recall  f1-score   support

           0       0.87      0.45      0.59        60
           1       0.38      0.22      0.28        27
           2       0.64      0.19      0.30        36

   micro avg       0.69      0.33      0.44       123
   macro avg       0.63      0.29      0.39       123
weighted avg       0.69      0.33      0.44       123
 samples avg       0.16      0.13      0.14       123

Overall performance
------------
              precision    recall  f1-score   support

           0       0.74      0.95      0.83       153
           1       0.86      0.50      0.63       100

    accuracy                           0.77       253
   macro avg       0.80      0.72      0.73       253
weighted avg       0.79      0.77      0.75       253



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [423]:
# LLM model results on validation data

# Append the output_list to the val dataframe to a new column called "predicted_labels_text"
val['predicted_labels_llm'] = output_list_llm
# Multi-hot encode the labels
mlb = MultiLabelBinarizer()
val['predicted_labels_llm'] = pd.DataFrame(mlb.fit_transform(val.predicted_labels_llm.apply(lambda x: re.findall(r'\d+', x))),columns=mlb.classes_, index=val.index).values.tolist()

# Print the classification report
print("Classification performance\n------------")
print(classification_report(val['labels'].values.tolist(), val['predicted_labels_llm'].values.tolist()))

# Check overall performance
# Print the classification report
print("Overall performance\n------------")
print(classification_report([1 if sum(i)>0 else 0 for i in val['labels']], [1 if sum(i)>0 else 0 for i in val['predicted_labels_llm']]))

Classification performance
------------
              precision    recall  f1-score   support

           0       0.86      0.42      0.56        60
           1       0.47      0.33      0.39        27
           2       0.58      0.19      0.29        36

   micro avg       0.68      0.33      0.45       123
   macro avg       0.64      0.31      0.41       123
weighted avg       0.70      0.33      0.45       123
 samples avg       0.16      0.13      0.14       123

Overall performance
------------
              precision    recall  f1-score   support

           0       0.75      0.95      0.84       153
           1       0.87      0.52      0.65       100

    accuracy                           0.78       253
   macro avg       0.81      0.73      0.74       253
weighted avg       0.80      0.78      0.76       253



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [429]:
val.to_json("../Classifiers/Data/cat_hearings_03_10_val_zeroshot_labels.json")

In [None]:
# # Previous template versions

# template = """ You are a classification model.
# The classifier has four classes:
# CLASS 1: Opposition to climate change legislation because this type of legislation is harmful, is expensive and harms the economy and competitiveness, harms vulnerable members of society, harms the the global poor, harms national security, harms the environment and species, has unpredictable consequences, and/or limits liberty and freedom.
# CLASS 2: Opposition to climate change legislation because this type of legislation is ineffective, because clean energy and jobs won't work, because climate policy will only make negligible difference to climate change, because climate policy will only make a difference in the distant future, because a single country or region only contributes a small proportion of global emissions, because climate action is pointless due to other countries' emissions, because existing climate policy solutions don't work, and/or because policy can be gamed or manipulated.
# CLASS 3: Opposition to climate change legislation because other issues are more important than climate change mitigation policies, because climate policy is too difficult or premature, because climate-friendly technologies and practices won't work or aren't ready yet, and/or because energy needs are high and/or rising and people need energy.
# NO CLAIM: No opposition to climate change legislation. The NO CLAIM class includes all paragraphs that do not fit into any of the other three classes, as well as, all paragraphs that express no opposition to climate change legislation. This means that paragraphs doubting climate change, climate change science and/or the climate change movement are also classed as NO CLAIM if they don't specifically mention any of the topics from CLASS 1, CLASS 2, and CLASS 3.

# Classify any given paragraph into one or more of the three classes CLASS 1 to CLASS 3, or if none of these apply, into NO CLAIM. The answer text should always contain the label(s) of the class(es) and a brief explanation of the coice(s). If none of the classes CLASS 1 to CLASS 3 fit, it is automatically NO CLAIM.
# % TEXT
# {text}

# YOUR RESPONSE:
# """