In [46]:
import pandas as pd
import numpy as np
%store -r X_tfidf_sparse
%store -r X_tfidf_sparse_test
%store -r y_test
%store -r y_train_encoded


In [47]:
class LogisticRegressionMulticlass:
    def __init__(self, learning_rate=0.3, num_iterations=10000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.weights = None
        self.bias = None
    
    
    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)
    
    def xavier_init(self, shape):
        fan_in = shape[0]
        fan_out = shape[1]
        limit = np.sqrt(6 / (fan_in + fan_out))
        return np.random.uniform(-limit, limit, size=shape)
    
    def fit(self, X, y):
        num_samples, num_features = X.shape
        num_classes = len(np.unique(y))
        self.weights = self.xavier_init((num_features, num_classes))
        self.bias = np.zeros((1, num_classes))
        y_one_hot = np.eye(num_classes)[y]
        
        for _ in range(self.num_iterations):
            linear_model = X.dot(self.weights) + self.bias
            y_pred = self.softmax(linear_model)
            
            dw = (1 / num_samples) * X.T.dot(y_pred - y_one_hot)
            db = (1 / num_samples) * np.sum(y_pred - y_one_hot, axis=0, keepdims=True)
            
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db
    
            
    def predict(self, X):

        num_samples, num_features = X.shape
        _, num_classes = self.weights.shape
    
        if num_features != self.weights.shape[0]:
            self.weights = self.xavier_init((num_features, num_classes))
            
        linear_predictions = X.dot(self.weights) + self.bias
        y_pred = self.softmax(linear_predictions)
        print (self.weights.shape)
        class_pred = np.argmax(y_pred, axis=1)  # Choose the class with the highest probability
        return class_pred

In [48]:
model_emotions = LogisticRegressionMulticlass()
model_emotions.fit(X_tfidf_sparse, y_train_encoded)

In [49]:
pred = model_emotions.predict (X_tfidf_sparse)

from sklearn.metrics import f1_score

micro_average_f1 = f1_score(pred, y_train_encoded, average='micro')
print("Micro-average F1-score:", micro_average_f1)

f1_external = f1_score(pred, y_train_encoded, average='weighted')
print("F1-score on training data:", f1_external)

#print (pred [0:300],y_train_encoded [0:300])


(7420, 7)
Micro-average F1-score: 0.7586656727543793
F1-score on training data: 0.7590430169509217


In [50]:
# Predict labels using the trained logistic regression model

y_external_pred = model_emotions.predict(X_tfidf_sparse_test)

print (y_test_encoded)

f1_external = f1_score(y_test_encoded, y_external_pred, average='weighted')
print("F1-score on external test data:", f1_external)
micro_average_f1 = f1_score(y_test_encoded, y_external_pred, average='micro')
print("Micro-average F1-score:", micro_average_f1)



(7420, 7)
[4 4 4 ... 6 4 4]
F1-score on external test data: 0.5838355255505735
Micro-average F1-score: 0.5860869565217391
