#### Import Relevant Libraries

In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from split_data_utils import train_test_spliting
from data_preprocessing import lemmatize_text_with_pos, tokens
from ngram_utils import generate_ngrams, build_ngram_vocab, bag_of_ngrams
from tf_idf_embedding_utils import tf_idf, tokenise, build_vocabulary, log_count_terms, count_terms

from nltk.tokenize import word_tokenize
import nltk
nltk.download('punkt')  # Download the punkt tokenizer data
nltk.download('wordnet')  # Download the WordNet data
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\maryk\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\maryk\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\maryk\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

#### Read Dataset

In [10]:
df = pd.read_excel("Humour_style.xlsx")   # Read Excel dataset 
df = df[['JOKES', 'LABELS']]              # Extract Only the Jokes and Labels Column
df = df[:1263]        

#### Split Dataset Into Train and Test

In [25]:
train_ratio = 0.8
seed = 100
x_train, x_test, y_train, y_test = train_test_spliting(df,train_ratio,seed)

print("x_train",x_train.dtype)   # Get the shape of the training features (Number of instance, number of features/column)
print("y_train",y_train.shape)   # Get the shape of the training label (Number of instance, number of column)
print("x_test",x_test.dtype)
print("y_test",y_test.shape)

print(x_train)

x_train object
y_train (1010,)
x_test object
y_test (253,)
['4 ways to become a better risk taker'
 '“Never argue with stupid people, they will drag you down to their level and then beat you with experience.”'
 '“Worrying is like paying a debt you don’t owe.”' ...
 "Worker dies at minnesota vikings' stadium construction site"
 "sharps' injuries could pose hiv, hepatitis risk to surgeons"
 "My set is full of them, but I have a go to bit about how awful it is being a fat chick with small tits that almost always saves me when I'm faltering."]


#### Data Preprocessing

In [12]:
# Lemmatize each example in the train dataset
lemmatized_x_train  = [lemmatize_text_with_pos(example) for example in x_train]

# Lemmatize each example in the test dataset
lemmatized_x_test  = [lemmatize_text_with_pos(example) for example in x_test]

x_train = np.array(lemmatized_x_train )   # Convert Train data to Numpy Array 
x_test = np.array(lemmatized_x_test)      # Convert Test data to Numpy Array

### Feature Engineering for Text Classification
In this notebook, we explore different feature engineering techniques for text classification using a Logistic Regression model. The implemented features include Bag-of-Ngrams, TF-IDF, and Positive/Negative word counts.


#### Bag-of-Ngrams Features

The Bag-of-Ngrams representation captures the frequency of word combinations (N-grams) in a document. The following steps outline the process:

1. **Generate N-grams**: Convert the input text into a list of N-grams.
2. **Build Vocabulary**: Create a set of unique N-grams to form the vocabulary.
3. **Create Bag-of-Ngrams**: Count the occurrences of each N-gram in the text and represent the document as a dense array.

In [13]:
### Get vocabulary size from train dataset
train_vocab  = set()

# Build the vocabulary from train N-grams
for example in x_train:
    vocabs = generate_ngrams(example,2)
    train_vocab.update(vocabs)

print(f"Vocabulary Size: {len(train_vocab)}")

# Convert train N-grams to vectors using the vocabulary
def convert_to_vectors(data, vocab):
    features = []
    for example in data:
        vectors = bag_of_ngrams(example, 2, vocab)
        features.append(vectors)
    return np.squeeze(np.array(features))

# Convert train N-grams to vectors
x_train_features = convert_to_vectors(x_train, train_vocab)

# Convert test N-grams to vectors using the same vocabulary
x_test_features = convert_to_vectors(x_test, train_vocab)

print(f"Train Features Shape: {x_train_features.shape}")
print(f"Test Features Shape: {x_test_features.shape}")


Vocabulary Size: 11794
Train Features Shape: (1010, 11794)
Test Features Shape: (253, 11794)


#### TF-IDF (Term Frequency-Inverse Document Frequency) Features
TF-IDF is a numerical statistic that reflects the importance of a word in a document relative to a collection of documents. The steps are as follows:

1. **Tokenization:** Break the text into individual words (tokens).
2. **Compute TF-IDF:** Use the tf-idf method calculate TF-IDF scores.

In [33]:
# Converting to Train data to lowercases
train_lower = [sentence.lower() for sentence in x_train]

vocabulary  = build_vocabulary(train_lower)
print("Vocabulary: ", len(vocabulary))

# TF-IDF vectors for Train dataset
tf_idf_values = tf_idf(train_lower,vocabulary)
tf_idf_matrix = []                      #Final Train dataset of tf-idf embeddings
for sentence in train_lower:
    tf_idf_sentence = []                #Store tf-idf for each sentence
    for word in tokenise(sentence):
        tf_idf_sentence .append(tf_idf_values[word])

    # Pad exach sentences to fixed length of vocabulary size
    padded_tf_idf_sentence=  np.pad(np.array(tf_idf_sentence), (0, len(vocabulary) - len(tf_idf_sentence))) 
    tf_idf_matrix.append(padded_tf_idf_sentence)

tf_idf_matrix = np.array(tf_idf_matrix)
print("-" *30,"\nTrain matrix ", np.array(tf_idf_matrix).shape)

# Converting to Test Dataset to lowercases
test_lower = [sentence.lower() for sentence in x_test]

# TF-IDF vectors for Text dataset
X_text_tf_idf_matrix = []
for sentence in test_lower:
    tf_idf_sentence = []
    for word in tokenise(sentence):
        # Use get method to handle the case where the word is not in the dictionary
        tf_idf_sentence.append(tf_idf_values.get(word, 0.0))
    padded_tf_idf_sentence=  np.pad(np.array(tf_idf_sentence), (0, len(vocabulary) - len(tf_idf_sentence))) 
    X_text_tf_idf_matrix.append(padded_tf_idf_sentence)

X_text_tf_idf_matrix =np.array(X_text_tf_idf_matrix)
print("-" *30,"\nTest matrix ", np.array(X_text_tf_idf_matrix).shape)

Vocabulary:  4009
------------------------------ 
Train matrix  (1010, 4009)
------------------------------ 
Test matrix  (253, 4009)


##### Positive/Negative and Pronoun Word Counts
This feature engineering approach counts the number of positive and negative words in a text. It involves the following steps:

1. **Tokenization:** Tokenize the text into words.
2. **Count Positive/Negative Words:** Use predefined lists of positive, negative and pronoun words to count occurrences.

Note: Can add more word counts based on different criteria


In [37]:
# Read positive and negative lexicons
positive_lexicon  = np.array(pd.read_fwf("positive-words.txt"))
negative_lexicon  = np.array(pd.read_fwf("negative-words.txt"))

def features(x):
    # Tokenize the text
    first_pro = ["i","we","us","me","myself",'my','mine',"our","ours","ourselves"]
    
    sec_pro = ["you","your","yours","him","her",'he','their',"them","they",
               "it","its","theirs","his","she","hers","himself", 
               "herself", "itself", "themselves"]
    
    words = word_tokenize(x.lower())

    # Count number of positive and negative words in a sentence 
    pos_count = sum(word in positive_lexicon for word in words) 
    neg_count = sum(word in negative_lexicon for word in words) 

    # Count occurrences of first person and second person pronouns
    f_p_count = sum(word in first_pro for word in words) 
    s_p_count = sum(word in sec_pro for word in words)

    return pos_count, neg_count, f_p_count, s_p_count

# Extract features for each example in the train dataset
x_train_feature_pnp = np.array([features(example) for example in x_train])

# Extract features for each example in the test dataset
x_test_feature_pnp = np.array([features(example) for example in x_test])

# Print the shapes of the feature matrices
print(x_train_feature_pnp.shape)
print(x_test_feature_pnp.shape)


(1010, 4)
(253, 4)


#### Multinomial Logistic Regression Model

The Multinomial Logistic Regression model is used for the Humour style multi-class classification problems. It extends the binary logistic regression to handle multiple classes.


In [32]:
def normalize(X):
    # Z-score standardization
    return (X - np.mean(X, axis=0)) / np.std(X, axis=0)

def sigmoid(z):
    # Sigmoid activation function
    return 1 / (1 + np.exp(-z))

def softmax(z):
    # Softmax activation function for multi-class classification
    exp_z = np.exp(z - np.max(z))  # Subtracting the max for numerical stability to prevent numerical overflow.
    return exp_z / exp_z.sum(axis=0, keepdims=True)

def oneHot(y):
    # Convert class labels to one-hot encoded vectors
    onehot_y = np.zeros((len(y),len(np.unique(y))), dtype=int)
    onehot_y[np.arange(len(y)),y] = 1 #np.arrange is used to create indices
    onehot_y = onehot_y.T
    return onehot_y

class MultiClass_LogisticReg:
    def __init__(self, n_input, n_output, learning_rate=0.1, n_epoch=1000):
        # Initialize logistic regression model parameters
        self.n_input  = n_input
        self.n_output = n_output
        self.learning_rate = learning_rate
        self.n_epoch = n_epoch
        self.w = np.random.rand(n_output,n_input) - 0.5 # range form -0.5 to 0.5
        self.b = np.random.rand(n_output,1) - 0.5

    
    def forward(self, x):
        # Forward pass through the model
        return softmax(np.dot(self.w, x.T) + self.b)
    
    def loss(self, x,y):
        # Compute categorical cross-entropy loss
        predictions = self.forward(x)
        target = oneHot(y)
        epsilon = 1e-15  # Small constant to avoid log(0)
        predictions = np.clip(predictions, epsilon, 1 - epsilon)  # Clip predictions to avoid log(0)
        loss = -np.sum(target * np.log(predictions))
        return loss
    
    def gradient(self, x, y):
        # Compute gradients for weight and bias
        y = oneHot(y)
        d_w = np.dot(self.forward(x) - y,x)/y.size
        d_b = np.sum(self.forward(x) - y, axis=1,  keepdims=True)/y.size
        return d_w, d_b
    
    def update_params(self, x, y):
         d_w, d_b = self.gradient(x,y)
         self.w = self.w - self.learning_rate * d_w
         self.b = self.b - self.learning_rate * d_b
         return self.w, self.b
    
    def get_prediction(self, pred):
        # Get class predictions based on the highest probability
         return np.argmax(pred, axis=0)
    
    def get_accuracy(self,predictions, y):
        # Calculate classification accuracy
        assert len(predictions) == len(y)
        try:
            print(f'Pred: {predictions} \t truth: {y}')
            return np.sum(predictions == y) /len(y)
        except ZeroDivisionError:
            return 0

    def learning_rate_schedule(self, epoch):
        # Implement a learning rate schedule if needed
        return self.learning_rate / (1 + epoch / self.n_epoch)  

    def fit(self,x,y, x_val=None, y_val=None, patience=100):
        best_loss = float('inf')
        best_epoch = 0

        for epoch in range(self.n_epoch):
            #forward pass
            prediction = self.forward(x)

            # Calculate and print loss
            loss = self.loss(x,y)
            if epoch % 100 == 0:
                print(f'Epoch: {epoch} \t Loss: {loss}')

                # Early stopping check
                if x_val is not None and y_val is not None:
                    val_loss = self.loss(x_val, y_val)
                    print(f'Validation Loss: {val_loss}')
                    if val_loss < best_loss:
                        best_loss = val_loss
                        best_epoch = epoch
                    elif epoch - best_epoch > patience:
                        print(f'Early stopping at epoch {epoch}. Best validation loss: {best_loss}')
                        break

            # Update parameters (weights and biases)
            self.update_params(x, y)

            # Optionally, print accuracy
            if epoch % 100 == 0:
                pred = self.get_prediction(prediction)
                accuracy = self.get_accuracy(pred, y)
                print("Accuracy: ", accuracy)

        # Save the best model parameters
        if x_val is not None and y_val is not None:
            print("Saving best model parameters.")
            self.save_model()

    def save_model(self):
        # Save your model parameters to a file or any other storage
        np.savez('tf_idf_LR.npz', w=self.w, b=self.b)
        print("Model saved.")

    def load_model(self, filename='tf_idf_LR.npz'):
        # Load model parameters from a file
        loaded_params = np.load(filename)
        self.w = loaded_params['w']
        self.b = loaded_params['b']
        print("Model loaded.")

    def predict(self, x):
        # Use the loaded model parameters for prediction
        prediction = self.forward(x)
        pred_labels = self.get_prediction(prediction)
        return pred_labels

In [33]:
# Instantiate the class
model = MultiClass_LogisticReg(tf_idf_matrix.shape[1],len(np.unique(y_train)))

#Train the Model
model.fit(tf_idf_matrix,y_train)

#Save the Trained Model Parameter
model.save_model()

Epoch: 0 	 Loss: 1678.8931923533062
Pred: [4 0 0 ... 0 4 4] 	 truth: [4 3 2 ... 2 2 2]
Accuracy:  0.12673267326732673
Epoch: 100 	 Loss: 1655.509457725071
Pred: [4 0 0 ... 0 4 4] 	 truth: [4 3 2 ... 2 2 2]
Accuracy:  0.12871287128712872
Epoch: 200 	 Loss: 1638.5748282862378
Pred: [4 0 0 ... 0 4 4] 	 truth: [4 3 2 ... 2 2 2]
Accuracy:  0.1594059405940594
Epoch: 300 	 Loss: 1624.5050283489406
Pred: [4 0 0 ... 0 4 4] 	 truth: [4 3 2 ... 2 2 2]
Accuracy:  0.17524752475247524
Epoch: 400 	 Loss: 1611.8956875790022
Pred: [4 0 0 ... 0 4 4] 	 truth: [4 3 2 ... 2 2 2]
Accuracy:  0.20693069306930692
Epoch: 500 	 Loss: 1600.1820962061825
Pred: [4 0 0 ... 0 4 4] 	 truth: [4 3 2 ... 2 2 2]
Accuracy:  0.24554455445544554
Epoch: 600 	 Loss: 1589.1297594921873
Pred: [4 0 0 ... 0 4 4] 	 truth: [4 3 2 ... 2 2 2]
Accuracy:  0.27920792079207923
Epoch: 700 	 Loss: 1578.6333324243933
Pred: [4 0 0 ... 0 4 4] 	 truth: [4 3 2 ... 2 2 2]
Accuracy:  0.3079207920792079
Epoch: 800 	 Loss: 1568.6376156381957
Pred: [

#### Loading and using saved model for prediction

In [35]:
# Load Saved Model for prediction
# Instantiate the class
model = MultiClass_LogisticReg(tf_idf_matrix.shape[1],len(np.unique(y_train)))

# Load the model parameters
model.load_model('tf_idf_LR.npz')

# Make predictions
predictions = model.predict(X_text_tf_idf_matrix)

accuracy = model.get_accuracy(predictions, y_test) #Get Accuracy

print("Test Accuracy: ", accuracy)

Model loaded.
Pred: [3 0 0 0 1 0 1 4 0 0 0 0 1 0 2 1 0 0 3 0 0 0 1 0 1 1 1 0 0 2 0 4 3 0 0 3 3
 4 0 1 0 1 0 1 0 0 0 4 0 0 3 4 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 1 4 0 2 2
 3 0 3 0 1 0 0 3 0 2 0 0 0 0 1 0 0 1 0 1 2 0 1 0 0 1 0 4 0 1 0 0 0 0 0 0 1
 0 0 0 2 0 0 4 3 1 0 3 0 0 0 4 0 3 4 1 1 0 0 2 0 0 4 1 0 0 1 0 0 0 0 3 0 0
 1 3 0 3 0 0 4 0 1 0 0 0 1 0 3 0 1 0 4 3 0 0 0 1 0 0 2 0 3 0 3 1 0 0 0 2 0
 0 0 0 0 3 3 3 0 0 0 1 0 4 0 1 3 0 3 3 1 0 0 0 0 3 0 0 0 1 0 0 0 0 0 0 4 0
 3 1 3 0 0 0 2 0 4 0 0 3 3 0 0 3 0 2 0 1 4 0 0 0 0 0 4 3 3 1 4] 	 truth: [4 3 4 1 3 4 1 4 3 4 0 0 1 4 1 4 0 0 3 1 4 4 2 2 4 4 4 3 0 3 3 4 3 3 0 4 2
 4 0 2 4 0 0 1 0 4 0 4 1 0 3 4 3 4 2 1 4 0 0 1 2 4 4 2 3 1 4 0 1 3 4 2 1 0
 2 0 3 3 0 3 1 1 4 1 0 0 3 0 4 4 1 3 0 2 0 0 2 4 3 4 1 2 4 1 2 2 3 2 2 4 4
 3 2 4 2 2 2 4 2 4 2 3 4 1 0 4 2 3 3 3 4 0 1 3 3 1 3 4 4 2 2 3 3 2 0 3 1 1
 4 4 3 3 0 0 4 0 4 0 1 1 1 1 2 4 4 2 4 2 0 0 4 1 1 0 3 0 3 4 3 1 2 1 4 1 3
 0 4 0 0 3 1 2 4 2 4 4 4 0 0 3 2 4 0 3 2 3 0 0 1 2 1 0 3 0 2 4 3 2 4 2 3 0
 3 2 2 

#### Test Model on Test dataset

In [13]:
# Assuming y_test is your true labels for the test set
predicted_labels = model.predict(xtest_features) #Call the Predict class
accuracy = model.get_accuracy(predicted_labels, y_test) #Get Accuracy

print("Test Accuracy: ", accuracy)

Pred: [4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 1 4 4 4 4
 4 4 2 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 1 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4
 4 1 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 0 1 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 2 4 4 4 4 4 4
 4 4 4 1 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 0 4 4 4 4 4 4 3 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4] 	 truth: [4 3 4 1 3 4 1 4 3 4 0 0 1 4 1 4 0 0 3 1 4 4 2 2 4 4 4 3 0 3 3 4 3 3 0 4 2
 4 0 2 4 0 0 1 0 4 0 4 1 0 3 4 3 4 2 1 4 0 0 1 2 4 4 2 3 1 4 0 1 3 4 2 1 0
 2 0 3 3 0 3 1 1 4 1 0 0 3 0 4 4 1 3 0 2 0 0 2 4 3 4 1 2 4 1 2 2 3 2 2 4 4
 3 2 4 2 2 2 4 2 4 2 3 4 1 0 4 2 3 3 3 4 0 1 3 3 1 3 4 4 2 2 3 3 2 0 3 1 1
 4 4 3 3 0 0 4 0 4 0 1 1 1 1 2 4 4 2 4 2 0 0 4 1 1 0 3 0 3 4 3 1 2 1 4 1 3
 0 4 0 0 3 1 2 4 2 4 4 4 0 0 3 2 4 0 3 2 3 0 0 1 2 1 0 3 0 2 4 3 2 4 2 3 0
 3 2 2 4 1 3 3 0 0 1 

#### Using Sckit-Learn

In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize

# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')

# Load your dataset
# Assuming your dataset has columns 'text' and 'humour_style'
# Adjust the path and format accordingly
dataset_path = 'Humour_style.xlsx'
df = pd.read_excel(dataset_path)

# Preprocess the text data
stop_words = set(stopwords.words('english'))
ps = PorterStemmer()

def preprocess_text(text):
    words = word_tokenize(text)
    words = [ps.stem(word.lower()) for word in words if word.isalpha() and word.lower() not in stop_words]
    return ' '.join(words)

df['processed_text'] = df['JOKES'].apply(preprocess_text)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    df['processed_text'], df['LABELS'], test_size=0.2, random_state=42
)

# Vectorize the text data using TF-IDF
#vectorizer = CountVectorizer()
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Encode the target labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

# Train a multinomial logistic regression model
model = LogisticRegression(multi_class='multinomial', max_iter=500)
model.fit(X_train_tfidf, y_train_encoded)

# Predictions on the test set
y_pred = model.predict(X_test_tfidf)

# Evaluate the model
accuracy = accuracy_score(y_test_encoded, y_pred)
classification_rep = classification_report(y_test_encoded, y_pred)

print(f'Accuracy: {accuracy}')
print('Classification Report:\n', classification_rep)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\maryk\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\maryk\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Accuracy: 0.5296442687747036
Classification Report:
               precision    recall  f1-score   support

           0       0.46      0.62      0.53        56
           1       0.58      0.36      0.44        50
           2       0.56      0.41      0.47        49
           3       0.47      0.60      0.53        42
           4       0.63      0.64      0.64        56

    accuracy                           0.53       253
   macro avg       0.54      0.53      0.52       253
weighted avg       0.54      0.53      0.52       253



#### Pytorch

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torcheval.metrics import BinaryConfusionMatrix, BinaryAccuracy, BinaryPrecision, BinaryRecall, BinaryF1Score

# Creates a Tensor from a numpy.ndarray.
# Any changes you make to the tensor will reflect in the original NumPy array
x_train_tensor = torch.from_numpy(xtrain_features).float()
y_train_tensor = torch.from_numpy(y_train).int()
x_test_tensor = torch.from_numpy(xtest_features).float()
y_test_tensor = torch.from_numpy(y_test).float()

## Changing labels to one-hot encoding to match output size
y_train_tensor = y_train_tensor.long() # If y_train_tensor is of type float, convert it to integer first
num_classes = len(torch.unique(y_train_tensor)) # Determine the number of classes
y_train_tensor = F.one_hot(y_train_tensor, num_classes=num_classes) # Convert to one-hot encoding
y_train_tensor = y_train_tensor.float() #Convert long type back to float as BCELOSS uses float

# Ask PyTorch to store any computed gradients so that we can examine them
x_train_tensor.requires_grad_(True)

# should be "None" at the moment. It will only be filled later after you call backward()
print(x_train_tensor.grad)

# Use a GPU if it exists
if torch.cuda.is_available():
    x_train_tensor = x_train_tensor.to('cuda')
    x_test_tensor = x_test_tensor.to('cuda')
    y_train_tensor = y_train_tensor.to('cuda')

print(y_train_tensor.shape)
print(num_classes)

None
torch.Size([1010, 5])
5


In [12]:
m = nn.Softmax() ##Sigmoid activation function

class LogisticRegressionPytorch(nn.Module):
    def __init__(self, n_input_vars, n_output_vars=5):
        super().__init__() # call constructor of superclass
        # CLASS torch.nn.Linear(in_features, out_features, bias=True, device=None, dtype=None)
        self.linear = nn.Linear(n_input_vars, n_output_vars)

    def forward(self, x):
        return m(self.linear(x))
    

input_n = xtrain_features.shape[1]
py_model = LogisticRegressionPytorch(input_n)

if torch.cuda.is_available():
    py_model.to('cuda')

print(py_model.linear.weight) # Print Weights
print(py_model.linear.bias)   # Print Bias
print(list(py_model.parameters())) # Print LR parameters (Both weight and Bias)

Parameter containing:
tensor([[ 6.5029e-03,  7.2377e-04,  5.0142e-04,  ..., -9.0817e-03,
         -8.5354e-03, -7.1694e-03],
        [-3.2552e-03, -5.7002e-03, -3.0747e-04,  ...,  5.3260e-03,
          7.8789e-03, -2.0703e-03],
        [-1.7740e-03, -7.2498e-03, -3.8780e-03,  ..., -6.0786e-03,
         -3.0342e-03, -4.4540e-03],
        [ 8.7529e-03,  8.3549e-03, -1.6993e-03,  ...,  1.6336e-03,
          6.5671e-03,  5.7072e-03],
        [ 6.0905e-03,  1.5982e-03,  5.7385e-03,  ...,  5.9996e-05,
          3.6836e-04, -6.3659e-03]], device='cuda:0', requires_grad=True)
Parameter containing:
tensor([-0.0071,  0.0043, -0.0091, -0.0082, -0.0012], device='cuda:0',
       requires_grad=True)
[Parameter containing:
tensor([[ 6.5029e-03,  7.2377e-04,  5.0142e-04,  ..., -9.0817e-03,
         -8.5354e-03, -7.1694e-03],
        [-3.2552e-03, -5.7002e-03, -3.0747e-04,  ...,  5.3260e-03,
          7.8789e-03, -2.0703e-03],
        [-1.7740e-03, -7.2498e-03, -3.8780e-03,  ..., -6.0786e-03,
         

In [19]:
criterion = nn.CrossEntropyLoss() # Binary cross entropy 

optimiser = torch.optim.Adam(py_model.parameters(), lr=0.01)

n_epoch = 1000

for epoch in range(n_epoch):
    # Reset the gradients
    optimiser.zero_grad()

    # forward pass
    y_hat = py_model(x_train_tensor)
    print(y_hat.shape)

    # compute loss
    loss = criterion(y_hat, y_train_tensor)

    # Backward pass (compute the gradients)
    loss.backward()

    # update parameters (weight and bias)
    optimiser.step()

    # print(f"Epoch: {epoch}\t w: {model.linear.weight.data[0]}\t b: {model.linear.bias.data[0]:.4f} \t L: {loss:.4f}")
    print(f"Epoch: {epoch}\t L: {loss:.4f}")


  return m(self.linear(x))


torch.Size([1010, 5])
Epoch: 0	 L: 0.9050
torch.Size([1010, 5])
Epoch: 1	 L: 0.9049
torch.Size([1010, 5])
Epoch: 2	 L: 0.9049
torch.Size([1010, 5])
Epoch: 3	 L: 0.9049
torch.Size([1010, 5])
Epoch: 4	 L: 0.9049
torch.Size([1010, 5])
Epoch: 5	 L: 0.9049
torch.Size([1010, 5])
Epoch: 6	 L: 0.9049
torch.Size([1010, 5])
Epoch: 7	 L: 0.9049
torch.Size([1010, 5])
Epoch: 8	 L: 0.9049
torch.Size([1010, 5])
Epoch: 9	 L: 0.9049
torch.Size([1010, 5])
Epoch: 10	 L: 0.9049
torch.Size([1010, 5])
Epoch: 11	 L: 0.9049
torch.Size([1010, 5])
Epoch: 12	 L: 0.9049
torch.Size([1010, 5])
Epoch: 13	 L: 0.9049
torch.Size([1010, 5])
Epoch: 14	 L: 0.9048
torch.Size([1010, 5])
Epoch: 15	 L: 0.9048
torch.Size([1010, 5])
Epoch: 16	 L: 0.9048
torch.Size([1010, 5])
Epoch: 17	 L: 0.9048
torch.Size([1010, 5])
Epoch: 18	 L: 0.9048
torch.Size([1010, 5])
Epoch: 19	 L: 0.9048
torch.Size([1010, 5])
Epoch: 20	 L: 0.9048
torch.Size([1010, 5])
Epoch: 21	 L: 0.9048
torch.Size([1010, 5])
Epoch: 22	 L: 0.9048
torch.Size([1010, 5])

In [20]:
y_predictions = py_model.forward(x_test_tensor) # prediction is in one-hot encoded form
binary_y_predictions= torch.argmax(y_predictions, dim=1) #convert it back to a one-dimensional tensor with binary values (0 or 1).
binary_y_predictions = binary_y_predictions.to(torch.int64)


y_predictions
binary_y_predictions
y_test_tensor

  return m(self.linear(x))


tensor([4., 3., 4., 1., 3., 4., 1., 4., 3., 4., 0., 0., 1., 4., 1., 4., 0., 0.,
        3., 1., 4., 4., 2., 2., 4., 4., 4., 3., 0., 3., 3., 4., 3., 3., 0., 4.,
        2., 4., 0., 2., 4., 0., 0., 1., 0., 4., 0., 4., 1., 0., 3., 4., 3., 4.,
        2., 1., 4., 0., 0., 1., 2., 4., 4., 2., 3., 1., 4., 0., 1., 3., 4., 2.,
        1., 0., 2., 0., 3., 3., 0., 3., 1., 1., 4., 1., 0., 0., 3., 0., 4., 4.,
        1., 3., 0., 2., 0., 0., 2., 4., 3., 4., 1., 2., 4., 1., 2., 2., 3., 2.,
        2., 4., 4., 3., 2., 4., 2., 2., 2., 4., 2., 4., 2., 3., 4., 1., 0., 4.,
        2., 3., 3., 3., 4., 0., 1., 3., 3., 1., 3., 4., 4., 2., 2., 3., 3., 2.,
        0., 3., 1., 1., 4., 4., 3., 3., 0., 0., 4., 0., 4., 0., 1., 1., 1., 1.,
        2., 4., 4., 2., 4., 2., 0., 0., 4., 1., 1., 0., 3., 0., 3., 4., 3., 1.,
        2., 1., 4., 1., 3., 0., 4., 0., 0., 3., 1., 2., 4., 2., 4., 4., 4., 0.,
        0., 3., 2., 4., 0., 3., 2., 3., 0., 0., 1., 2., 1., 0., 3., 0., 2., 4.,
        3., 2., 4., 2., 3., 0., 3., 2., 

In [22]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Assuming y_predictions contains the predicted class indices
# and y_test_tensor contains the true class indices

# Sample data (replace these with your actual predictions and true labels)

# Convert PyTorch tensors to numpy arrays
multi_y_predictions = y_predictions.detach().cpu().numpy()
multi_y_true = y_test_tensor.detach().cpu().numpy()

# Convert predicted probabilities to class indices
predicted_labels = np.argmax(multi_y_predictions, axis=1)

# Compute metrics
accuracy = accuracy_score(multi_y_true, predicted_labels)
precision = precision_score(multi_y_true, predicted_labels, average='weighted')
recall = recall_score(multi_y_true, predicted_labels, average='weighted')
f1 = f1_score(multi_y_true, predicted_labels, average='weighted')

# Print metrics
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

# Detailed classification report
print("\nClassification Report:")
print(classification_report(multi_y_true, predicted_labels))


Accuracy: 0.6522
Precision: 0.6363
Recall: 0.6522
F1 Score: 0.6368

Classification Report:
              precision    recall  f1-score   support

         0.0       0.66      0.82      0.73        51
         1.0       0.59      0.40      0.48        42
         2.0       0.46      0.33      0.38        40
         3.0       0.59      0.63      0.61        52
         4.0       0.79      0.88      0.83        68

    accuracy                           0.65       253
   macro avg       0.62      0.61      0.61       253
weighted avg       0.64      0.65      0.64       253

