Experiment 1: Zero-Shot Learning and Classification

In [None]:
from transformers import pipeline

# Load the zero-shot classification pipeline
classifier = pipeline('zero-shot-classification', model='facebook/bart-large-mnli')

# Define your labels (categories) for the tasks
labels = ["commodity", "industry", "CO2", "CH4", "N2O", "production", "transportation", "distribution"]

# Define a list of textual descriptions for zero-shot classification
texts = [
    "This is a description of the production process of steel.",
    "Emissions from transportation of goods.",
    "Methane emissions from livestock."
]

# Perform zero-shot classification
for text in texts:
    result = classifier(text, labels)
    print(f"Text: {text}")
    print(f"Classification: {result}\n")


Experiment 2: Supervised Learning Using Classical Models

In [None]:
from sentence_transformers import SentenceTransformer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, accuracy_score
import numpy as np
import pandas as pd

# Load the SBERT model
sbert_model = SentenceTransformer('distilbert-base-nli-mean-tokens')

# Load your dataset
data = pd.read_csv('useeio_dataset.csv')
texts = data['description'].tolist()
labels = data['label'].tolist()

# Compute SBERT embeddings
embeddings = sbert_model.encode(texts)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(embeddings, labels, test_size=0.2, random_state=42)

# Regression task for emission quantity prediction
regressor = RandomForestRegressor()
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
print(f"Regression MSE: {mean_squared_error(y_test, y_pred)}")

# Classification task for emission type identification
classifier = LogisticRegression()
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print(f"Classification Accuracy: {accuracy_score(y_test, y_pred)}")


Experiment 3: Fine-Tuning BERT for Scope 3 Emission Estimation

In [None]:
import tensorflow as tf
from transformers import TFBertForSequenceClassification, BertTokenizer, create_optimizer
from sklearn.preprocessing import LabelEncoder

# Load the BERT model and tokenizer
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=1)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Preprocess the dataset
texts = data['description'].tolist()
labels = data['emission_value'].tolist()
encodings = tokenizer(texts, truncation=True, padding=True)

# Convert data to TensorFlow dataset
dataset = tf.data.Dataset.from_tensor_slices((dict(encodings), labels))
dataset = dataset.shuffle(len(texts)).batch(16)

# Define optimizer, loss, and metrics
num_train_steps = len(dataset) * 3
optimizer, lr_schedule = create_optimizer(init_lr=2e-5, num_warmup_steps=0, num_train_steps=num_train_steps)
model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mean_squared_error'])

# Fine-tune the model
history = model.fit(dataset, epochs=3)

# Evaluate the model
loss, mse = model.evaluate(dataset)
print(f"Fine-tuned BERT MSE: {mse}")


Putting It All Together
Here’s a consolidated script that incorporates all three experiments and evaluates their performance. This is a high-level overview and should be adjusted based on your specific requirements and dataset structure.

In [None]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from transformers import pipeline, TFBertForSequenceClassification, BertTokenizer, create_optimizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, accuracy_score
import tensorflow as tf

# Load the dataset
data = pd.read_csv('useeio.csv')

# Experiment 1: Zero-Shot Learning and Classification
def zero_shot_classification(texts, labels):
    classifier = pipeline('zero-shot-classification', model='facebook/bart-large-mnli')
    for text in texts:
        result = classifier(text, labels)
        print(f"Text: {text}")
        print(f"Classification: {result}\n")

# Experiment 2: Supervised Learning Using Classical Models
def supervised_learning(data):
    sbert_model = SentenceTransformer('distilbert-base-nli-mean-tokens')
    texts = data['description'].tolist()
    labels = data['label'].tolist()
    embeddings = sbert_model.encode(texts)
    X_train, X_test, y_train, y_test = train_test_split(embeddings, labels, test_size=0.2, random_state=42)

    # Regression
    regressor = RandomForestRegressor()
    regressor.fit(X_train, y_train)
    y_pred = regressor.predict(X_test)
    print(f"Regression MSE: {mean_squared_error(y_test, y_pred)}")

    # Classification
    classifier = LogisticRegression()
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    print(f"Classification Accuracy: {accuracy_score(y_test, y_pred)}")

# Experiment 3: Fine-Tuning BERT for Scope 3 Emission Estimation
def fine_tuning_bert(data):
    texts = data['description'].tolist()
    labels = data['emission_value'].tolist()
    model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=1)
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    encodings = tokenizer(texts, truncation=True, padding=True)
    dataset = tf.data.Dataset.from_tensor_slices((dict(encodings), labels))
    dataset = dataset.shuffle(len(texts)).batch(16)
    
    num_train_steps = len(dataset) * 3
    optimizer, lr_schedule = create_optimizer(init_lr=2e-5, num_warmup_steps=0, num_train_steps=num_train_steps)
    model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mean_squared_error'])
    
    history = model.fit(dataset, epochs=3)
    loss, mse = model.evaluate(dataset)
    print(f"Fine-tuned BERT MSE: {mse}")

# Define the texts and labels for zero-shot classification
texts = [
    "This is a description of the production process of steel.",
    "Emissions from transportation of goods.",
    "Methane emissions from livestock."
]
labels = ["commodity", "industry", "CO2", "CH4", "N2O", "production", "transportation", "distribution"]

# Run experiments
print("Experiment 1: Zero-Shot Learning and Classification")
zero_shot_classification(texts, labels)

print("Experiment 2: Supervised Learning Using Classical Models")
supervised_learning(data)

print("Experiment 3: Fine-Tuning BERT for Scope 3 Emission Estimation")
fine_tuning_bert(data)
