In [49]:
# importing all the necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

In [50]:
# Reading the data 
data = pd.read_excel('text_dataset.xlsx') 

In [51]:
# see the data 
data

Unnamed: 0,id,sentence,voice
0,1.0,The chef prepares the meal.,Active
1,2.0,The teacher explains the lesson clearly.,Active
2,3.0,The gardener waters the plants every morning.,Active
3,4.0,The kids play soccer in the park.,Active
4,5.0,The author wrote a thrilling novel.,Active
5,6.0,The scientist conducts experiments in the lab.,Active
6,7.0,The company launched a new product.,Active
7,8.0,The artist paints a beautiful portrait.,Active
8,9.0,The musician composes a melody.,Active
9,10.0,The photographer takes stunning pictures.,Active


In [52]:
# selecting the target variable and features 
X = data['sentence']
y = data['voice'] 

In [53]:
# splitting the data into train, validation and test
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [54]:
# converting the text data into numerical values
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_val_vec = vectorizer.transform(X_val)
X_test_vec = vectorizer.transform(X_test)

In [55]:
# training model
classifier = LogisticRegression()
classifier.fit(X_train_vec, y_train)

LogisticRegression()

In [56]:
# evaluating on validation data
y_val_pred = classifier.predict(X_val_vec)
print(y_val_pred)
print("---------------------------------------------->")
print(X_val)

['Active' 'Active' 'Active' 'Passive' 'Passive' 'Active' 'Passive'
 'Passive']
---------------------------------------------->
15         The architect draws the plans for the house.
8                       The musician composes a melody.
4                   The author wrote a thrilling novel.
27       A beautiful portrait is painted by the artist.
25    Experiments are conducted in the lab by the sc...
13                   The engineer designs a new bridge.
26           A new product was launched by the company.
37    The customers are served efficiently by the wa...
Name: sentence, dtype: object


In [57]:
# evaluating on validation data
val_accuracy = accuracy_score(y_val, y_val_pred)
print("Validation Accuracy:", val_accuracy)
print("---------------------------------------------->")
print(classification_report(y_val, y_val_pred))

Validation Accuracy: 1.0
---------------------------------------------->
              precision    recall  f1-score   support

      Active       1.00      1.00      1.00         4
     Passive       1.00      1.00      1.00         4

    accuracy                           1.00         8
   macro avg       1.00      1.00      1.00         8
weighted avg       1.00      1.00      1.00         8



In [58]:
# evaluating on testing data
y_test_pred = classifier.predict(X_test_vec)
print(y_test_pred)
print("---------------------------------------------->")
print(X_test)

['Active' 'Active' 'Active' 'Passive' 'Passive' 'Active' 'Passive'
 'Active']
---------------------------------------------->
19          The student submits the assignment on time.
16             The manager organizes the work schedule.
12                 The designer creates a modern dress.
34          The application is coded by the programmer.
31    The news is accurately reported by the journal...
9             The photographer takes stunning pictures.
39    The assignment is submitted on time by the stu...
6                   The company launched a new product.
Name: sentence, dtype: object


In [59]:
# evaluating on testing data
test_accuracy = accuracy_score(y_test, y_test_pred)
print("Test Accuracy:", test_accuracy)
print("---------------------------------------------->")
print(classification_report(y_test, y_test_pred))

Test Accuracy: 1.0
---------------------------------------------->
              precision    recall  f1-score   support

      Active       1.00      1.00      1.00         5
     Passive       1.00      1.00      1.00         3

    accuracy                           1.00         8
   macro avg       1.00      1.00      1.00         8
weighted avg       1.00      1.00      1.00         8



In [60]:
X_test

19          The student submits the assignment on time.
16             The manager organizes the work schedule.
12                 The designer creates a modern dress.
34          The application is coded by the programmer.
31    The news is accurately reported by the journal...
9             The photographer takes stunning pictures.
39    The assignment is submitted on time by the stu...
6                   The company launched a new product.
Name: sentence, dtype: object

In [72]:
# Let understand with Example Sentence
example = {
    "sentence": ["A Honda car was bought by me"]
}

In [73]:
sample_df = pd.DataFrame(example)

In [74]:
print(sample_df)

                       sentence
0  A Honda car was bought by me


In [75]:
# X_test_vec = vectorizer.transform(sample_df)
X_test_vec = vectorizer.transform(sample_df['sentence'])
print(X_test_vec)

  (0, 8)	1
  (0, 79)	1
