# Hybrid Zero Shot Learning Approach

In [28]:
import pandas as pd

<h2>Data Retrieval</h2>

In [29]:
# Loading in promise dataset
df = pd.read_csv("datasets/promise.csv")

df.head()

Unnamed: 0,INPUT,TYPE
0,The system shall refresh the display every 60 seconds.,PE
1,The application shall match the color of the schema set forth by Department of Homeland Security,LF
2,If projected the data must be readable. On a 10x10 projection screen 90% of viewers must be able to read Event / Activity data from a viewing distance of 30,US
3,The product shall be available during normal business hours. As long as the user has access to the client PC the system will be available 99% of the time during the first six months of operation.,A
4,If projected the data must be understandable. On a 10x10 projection screen 90% of viewers must be able to determine that Events or Activities are occuring in current time from a viewing distance of 100,US


<h2>Data Preprocessing</h2>

<h3>Lowercasing</h3>

In [30]:
df['INPUT'] = df['INPUT'].str.lower()

df.head()

Unnamed: 0,INPUT,TYPE
0,the system shall refresh the display every 60 seconds.,PE
1,the application shall match the color of the schema set forth by department of homeland security,LF
2,if projected the data must be readable. on a 10x10 projection screen 90% of viewers must be able to read event / activity data from a viewing distance of 30,US
3,the product shall be available during normal business hours. as long as the user has access to the client pc the system will be available 99% of the time during the first six months of operation.,A
4,if projected the data must be understandable. on a 10x10 projection screen 90% of viewers must be able to determine that events or activities are occuring in current time from a viewing distance of 100,US


<h3>Stopword Removal</h3>

In [31]:
import nltk
from nltk.corpus import stopwords

# Define stop words
stop_words = set(stopwords.words('english'))

# Function to remove stopwords
def remove_stopwords(text):
    words = text.split()  # Split the text into words
    filtered_text = ' '.join([word for word in words if word.lower() not in stop_words])
    return filtered_text

# Apply stopword removal to the DataFrame column
df['INPUT'] = df['INPUT'].apply(remove_stopwords)

df.head()

Unnamed: 0,INPUT,TYPE
0,system shall refresh display every 60 seconds.,PE
1,application shall match color schema set forth department homeland security,LF
2,projected data must readable. 10x10 projection screen 90% viewers must able read event / activity data viewing distance 30,US
3,product shall available normal business hours. long user access client pc system available 99% time first six months operation.,A
4,projected data must understandable. 10x10 projection screen 90% viewers must able determine events activities occuring current time viewing distance 100,US


<h3>Punctuation Removal<h3>

In [33]:
import string

# Replacing percent symbol to string
df['INPUT'] = df['INPUT'].str.replace('%', ' percent')

# Remove punctuation from a specific column (e.g., 'Column1')
df['INPUT'] = df['INPUT'].apply(lambda x: x.translate(str.maketrans('', '', string.punctuation)) if isinstance(x, str) else x)

df.head(20)

Unnamed: 0,INPUT,TYPE
0,system shall refresh display every 60 seconds,PE
1,application shall match color schema set forth department homeland security,LF
2,projected data must readable 10x10 projection screen 90 percent viewers must able read event activity data viewing distance 30,US
3,product shall available normal business hours long user access client pc system available 99 percent time first six months operation,A
4,projected data must understandable 10x10 projection screen 90 percent viewers must able determine events activities occuring current time viewing distance 100,US
5,product shall ensure accessed authorized users product able distinguish authorized unauthorized users access attempts,SE
6,product shall intuitive selfexplanatory 90 percent new users shall able start display events activities within 90 minutes using product,US
7,product shall respond fast keep uptodate data display,PE
8,system shall mdi form allows viewing graph data table,F
9,system shall display events vertical table time,F
