In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score

In [7]:
df = pd.read_csv(r'C:/Users/Shakil/Downloads/Compressed/text_classification.csv')
df.head()

Unnamed: 0,Scenario,Requirement,Requirement Type,Author
0,The Internet of Objects (IoT) Based Applicatio...,Collection and analysis of environmental data ...,Functional,Human
1,The Internet of Objects (IoT) Based Applicatio...,"Monitoring, analyzing traffic flow and providi...",Functional,Human
2,The Internet of Objects (IoT) Based Applicatio...,"In case of emergency, sending instant notifica...",Functional,Human
3,The Internet of Objects (IoT) Based Applicatio...,"For parking space management, identifying empt...",Functional,Human
4,The Internet of Objects (IoT) Based Applicatio...,Automatically adjusting lighting and electrica...,Functional,Human


In [8]:
df.isnull().sum()

Scenario            0
Requirement         0
Requirement Type    0
Author              0
dtype: int64

#### Feature, Target and split the data

In [9]:
# Feature & Target 
x = df['Requirement']
y = df['Author']

# split the data
xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size=0.2, random_state=42)

### classification 

In [10]:
# ml models for classification
models = {'Random Forest': RandomForestClassifier(),
         'Naive Bayes': MultinomialNB(),
         'SVM': SVC(),
         'Decision Tree': DecisionTreeClassifier(),
         'MLP': MLPClassifier()}

# TF-IDF vectorizer is used to transform text data into numerical features, considering both unigrams and bigrams.
# Vectorize the text data
tfidf_vec = TfidfVectorizer(ngram_range=(1,2)) 
xtrain_tfidf = tfidf_vec.fit_transform(xtrain)
xtest_tfidf = tfidf_vec.transform(xtest)

In [11]:
# train and evaluate models

for model_name, model in models.items():
  model.fit(xtrain_tfidf, ytrain) # train
  ypred = model.predict(xtest_tfidf) # predict on the test data
  print(f"Classification Report for {model_name}:")
  print(classification_report(ytest, ypred))

# Loop iterates through each model, 
# trains it on TFIDF features, 
# makes predictions, and prints classification report.

Classification Report for Random Forest:
              precision    recall  f1-score   support

     ChatGpt       1.00      0.83      0.91        36
       Human       0.88      1.00      0.94        44

    accuracy                           0.93        80
   macro avg       0.94      0.92      0.92        80
weighted avg       0.93      0.93      0.92        80

Classification Report for Naive Bayes:
              precision    recall  f1-score   support

     ChatGpt       0.79      0.94      0.86        36
       Human       0.95      0.80      0.86        44

    accuracy                           0.86        80
   macro avg       0.87      0.87      0.86        80
weighted avg       0.88      0.86      0.86        80

Classification Report for SVM:
              precision    recall  f1-score   support

     ChatGpt       0.94      0.86      0.90        36
       Human       0.89      0.95      0.92        44

    accuracy                           0.91        80
   macro avg     

### Test the models

In [12]:
sample_text = ["We loved with a love that was more than love. - Edgar Allan Poe",
                "The heart was made to be broken. - Oscar Wilde"]

# Vectorize the sample texts
test_examples_tfidf = tfidf_vec.transform(sample_text)

# Predict and print results for each example using each model
for model_name, model in models.items():
    print(f"Predictions for {model_name}:")
    predictions = model.predict(test_examples_tfidf)
    for text, prediction in zip(sample_text, predictions):
        print(f"Text: {text}")
        print(f"Predicted Author: {prediction}")
    print("="*60)

Predictions for Random Forest:
Text: We loved with a love that was more than love. - Edgar Allan Poe
Predicted Author: Human
Text: The heart was made to be broken. - Oscar Wilde
Predicted Author: Human
Predictions for Naive Bayes:
Text: We loved with a love that was more than love. - Edgar Allan Poe
Predicted Author: Human
Text: The heart was made to be broken. - Oscar Wilde
Predicted Author: ChatGpt
Predictions for SVM:
Text: We loved with a love that was more than love. - Edgar Allan Poe
Predicted Author: Human
Text: The heart was made to be broken. - Oscar Wilde
Predicted Author: ChatGpt
Predictions for Decision Tree:
Text: We loved with a love that was more than love. - Edgar Allan Poe
Predicted Author: Human
Text: The heart was made to be broken. - Oscar Wilde
Predicted Author: Human
Predictions for MLP:
Text: We loved with a love that was more than love. - Edgar Allan Poe
Predicted Author: Human
Text: The heart was made to be broken. - Oscar Wilde
Predicted Author: ChatGpt
