# Hybrid Zero Shot Learning Approach

In [1]:
import pandas as pd

<h2>Data Retrieval</h2>

In [2]:
# Loading in promise dataset
df = pd.read_csv("datasets/promise.csv")

df.head()

Unnamed: 0,INPUT,TYPE
0,The system shall refresh the display every 60 ...,PE
1,The application shall match the color of the s...,LF
2,If projected the data must be readable. On a...,US
3,The product shall be available during normal b...,A
4,If projected the data must be understandable....,US


<h2>Data Preprocessing</h2>

<h3>Lowercasing</h3>

In [10]:
df['INPUT'] = df['INPUT'].str.lower()

df.head()

Unnamed: 0,INPUT,TYPE
0,the system shall refresh the display every 60 ...,PE
1,the application shall match the color of the s...,LF
2,if projected the data must be readable. on a...,US
3,the product shall be available during normal b...,A
4,if projected the data must be understandable....,US


<h3>Stopword Removal</h3>

In [11]:
import nltk
from nltk.corpus import stopwords

# Define stop words
stop_words = set(stopwords.words('english'))

# Function to remove stopwords
def remove_stopwords(text):
    words = text.split()  # Split the text into words
    filtered_text = ' '.join([word for word in words if word.lower() not in stop_words])
    return filtered_text

# Apply stopword removal to the DataFrame column
df['INPUT'] = df['INPUT'].apply(remove_stopwords)

df.head()

Unnamed: 0,INPUT,TYPE
0,system shall refresh display every 60 seconds.,PE
1,application shall match color schema set forth...,LF
2,projected data must readable. 10x10 projection...,US
3,product shall available normal business hours....,A
4,projected data must understandable. 10x10 proj...,US


<h3>Punctuation Removal<h3>

In [12]:
import string

# Replacing percent symbol to string
df['INPUT'] = df['INPUT'].str.replace('%', ' percent')

# Remove punctuation from a specific column (e.g., 'Column1')
df['INPUT'] = df['INPUT'].apply(lambda x: x.translate(str.maketrans('', '', string.punctuation)) if isinstance(x, str) else x)

df.head(20)

Unnamed: 0,INPUT,TYPE
0,system shall refresh display every 60 seconds,PE
1,application shall match color schema set forth...,LF
2,projected data must readable 10x10 projection ...,US
3,product shall available normal business hours ...,A
4,projected data must understandable 10x10 proje...,US
5,product shall ensure accessed authorized users...,SE
6,product shall intuitive selfexplanatory 90 pe...,US
7,product shall respond fast keep uptodate data ...,PE
8,system shall mdi form allows viewing graph dat...,F
9,system shall display events vertical table time,F


<h2>Loading Pre-Trained Models</h2>

In [13]:
from transformers import AutoTokenizer, TFAutoModel

# Load the tokenizer and model for BERT4RE
model_name = "thearod5/bert4re"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModel.from_pretrained(model_name)

# Confirm the model is loaded
print("Model and tokenizer loaded successfully with TensorFlow.")

ImportError: 
TFAutoModel requires the TensorFlow library but it was not found in your environment. Checkout the instructions on the
installation page: https://www.tensorflow.org/install and follow the ones that match your environment.
Please note that you may need to restart your runtime after installation.
