# Hybrid Zero Shot Learning Approach

In [2]:
import pandas as pd

<h2>Data Retrieval</h2>

In [3]:
# Loading in promise dataset
df = pd.read_csv("datasets/promise.csv")

df.head()

Unnamed: 0,INPUT,TYPE
0,The system shall refresh the display every 60 ...,PE
1,The application shall match the color of the s...,LF
2,If projected the data must be readable. On a...,US
3,The product shall be available during normal b...,A
4,If projected the data must be understandable....,US


<h2>Data Preprocessing</h2>

<h3>Lowercasing</h3>

In [6]:
df['INPUT'] = df['INPUT'].str.lower()

print(df)

                                                 INPUT TYPE
0       system shall refresh display every 60 seconds.   PE
1    application shall match color schema set forth...   LF
2    projected data must readable. 10x10 projection...   US
3    product shall available normal business hours....    A
4    projected data must understandable. 10x10 proj...   US
..                                                 ...  ...
620  user access limited permissions granted role(s...   SE
621  product must comply intranet page standards re...   LF
622  intranet pages display appropriately resolutio...   LF
623  users able easily use system successfully comp...   US
624  product interface fast. response time product ...   PE

[625 rows x 2 columns]


<h3>Stopword Removal</h3>

In [7]:
import nltk
from nltk.corpus import stopwords

# Define stop words
stop_words = set(stopwords.words('english'))

# Function to remove stopwords
def remove_stopwords(text):
    words = text.split()  # Split the text into words
    filtered_text = ' '.join([word for word in words if word.lower() not in stop_words])
    return filtered_text

# Apply stopword removal to the DataFrame column
df['INPUT'] = df['INPUT'].apply(remove_stopwords)

# Display the DataFrame
print(df)

                                                 INPUT TYPE
0       system shall refresh display every 60 seconds.   PE
1    application shall match color schema set forth...   LF
2    projected data must readable. 10x10 projection...   US
3    product shall available normal business hours....    A
4    projected data must understandable. 10x10 proj...   US
..                                                 ...  ...
620  user access limited permissions granted role(s...   SE
621  product must comply intranet page standards re...   LF
622  intranet pages display appropriately resolutio...   LF
623  users able easily use system successfully comp...   US
624  product interface fast. response time product ...   PE

[625 rows x 2 columns]


<h3>Punctuation Removal<h3>

In [10]:
import string

# Remove punctuation from a specific column (e.g., 'Column1')
df['INPUT'] = df['INPUT'].apply(lambda x: x.translate(str.maketrans('', '', string.punctuation)) if isinstance(x, str) else x)

print(df)

                                                 INPUT TYPE
0        system shall refresh display every 60 seconds   PE
1    application shall match color schema set forth...   LF
2    projected data must readable 10x10 projection ...   US
3    product shall available normal business hours ...    A
4    projected data must understandable 10x10 proje...   US
..                                                 ...  ...
620  user access limited permissions granted roles ...   SE
621  product must comply intranet page standards re...   LF
622  intranet pages display appropriately resolutio...   LF
623  users able easily use system successfully comp...   US
624  product interface fast response time product i...   PE

[625 rows x 2 columns]
