In [1]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

In [2]:
#import nltk
#nltk.download('stopwords')
StopWords = stopwords.words('english')
print(StopWords[:])

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

#### Filtering Data

In [3]:
TextDataSet = []
with open('FinalDataSet.txt') as my_file:
    for line in my_file:
        TextDataSet.append(line.replace("\n","").replace("\r",""))
my_file.close()

Vectorizer = TfidfVectorizer(smooth_idf=False,stop_words=StopWords,max_df=0.63, min_df=0.001)
DataSet = Vectorizer.fit_transform(TextDataSet)
DataSetArray = DataSet.toarray()
#print(Vectorizer.get_feature_names_out())

In [4]:
print(DataSetArray.shape)

(5582, 1393)


#### Necessary Parameter Inputs

In [5]:
# Hyper-Parameters
# C = 2.5
# sigma = 0.1

angry = 1002
fear = 1002
joy = 1002
love = 1002
sad = 1002
suprise = 572

#### L2 SVM Training Function

CPU Computation

In [6]:
def KernelFunction(X1,X2,sigma):
    # np.exp((-1)*(np.linalg.norm(X1-X2))/(2*(sigma**2)))  ## RBF Kernel Function
    # np.dot(X1,X2)                                        ## Liner Kernel function
    return np.exp((-1)*(np.linalg.norm(X1-X2))/(2*(sigma**2)))

def KernelMatrix(Data,Y,NoOfDataSets,si):
    K = np.ones((NoOfDataSets,NoOfDataSets))
    for i in range(0,NoOfDataSets):
        for j in range(0,NoOfDataSets):
            K[i,j] = Y[i,0]*Y[j,0]*KernelFunction(Data[i,0:],Data[j,0:],si)
    return K

def SVMPara(Data,Y,C,NoOfDataSets,sigma):
    UpperMatrix = np.concatenate((np.array([[0]]),(-1)*Y.transpose()),axis = 1)
    LowerMatrix = np.concatenate((Y,KernelMatrix(Data,Y,NoOfDataSets,sigma) + ((1/C)*np.eye(NoOfDataSets))),axis = 1)
    A = np.concatenate((UpperMatrix,LowerMatrix),axis = 0)
    B = (np.concatenate((np.array([[0]]),np.array([np.ones(NoOfDataSets)])),axis = 1)).transpose()
    return np.linalg.inv(A)@B

def Weight(w,X_train,Y_train):
    return np.concatenate((np.array([[w[0,0]]]),np.transpose(np.array([np.sum(np.multiply(np.multiply(Y_train,w[1:]),X_train),axis=0)]))),axis=0)


GPU Computation

#### Outcomes
###### Note: Using DatasetArray variable as data

In [7]:
S_Angry = np.array([np.concatenate(((1)*np.ones(angry),np.concatenate(((-1)*np.ones(fear ),np.concatenate(((-1)*np.ones(joy),np.concatenate(((-1)*np.ones(love),np.concatenate(((-1)*np.ones(sad),(-1)*np.ones(suprise)))))))))))]).transpose()
S_Fear = np.array([np.concatenate(((-1)*np.ones(angry),np.concatenate(((1)*np.ones(fear ),np.concatenate(((-1)*np.ones(joy),np.concatenate(((-1)*np.ones(love),np.concatenate(((-1)*np.ones(sad),(-1)*np.ones(suprise)))))))))))]).transpose()
S_Joy = np.array([np.concatenate(((-1)*np.ones(angry),np.concatenate(((-1)*np.ones(fear ),np.concatenate(((1)*np.ones(joy),np.concatenate(((-1)*np.ones(love),np.concatenate(((-1)*np.ones(sad),(-1)*np.ones(suprise)))))))))))]).transpose()
S_Love = np.array([np.concatenate(((-1)*np.ones(angry),np.concatenate(((-1)*np.ones(fear ),np.concatenate(((-1)*np.ones(joy),np.concatenate(((1)*np.ones(love),np.concatenate(((-1)*np.ones(sad),(-1)*np.ones(suprise)))))))))))]).transpose()
S_Sad = np.array([np.concatenate(((-1)*np.ones(angry),np.concatenate(((-1)*np.ones(fear ),np.concatenate(((-1)*np.ones(joy),np.concatenate(((-1)*np.ones(love),np.concatenate(((1)*np.ones(sad),(-1)*np.ones(suprise)))))))))))]).transpose()
S_Surprise = np.array([np.concatenate(((-1)*np.ones(angry),np.concatenate(((-1)*np.ones(fear ),np.concatenate(((-1)*np.ones(joy),np.concatenate(((-1)*np.ones(love),np.concatenate(((-1)*np.ones(sad),(1)*np.ones(suprise)))))))))))]).transpose()

### Testing for Hyper parameter

In [8]:
X_train, X_test, Y_train, Y_test = train_test_split(DataSetArray,S_Sad,test_size=0.3, random_state=935)
C = np.array([0.01, 0.1, 1, 10, 100])
Sigma = np.array([0.001, 0.01, 0.1, 1, 10])
X_test = np.concatenate((np.ones((X_test.shape[0],1)),X_test), axis=1)
Result = np.ones((len(C),len(Sigma)))
for i in range(0,len(C)):
    for j in range(0,len(Sigma)):
        w = SVMPara(X_train,Y_train,C[i],X_train.shape[0],Sigma[j])
        y = np.sign(X_test@Weight(w,X_train,Y_train))
        Result[i,j] = np.trace(confusion_matrix(Y_test,y))/len(Y_test)
        print(i,j)


0 0
0 1
0 2
0 3
0 4
1 0
1 1
1 2
1 3
1 4
2 0
2 1
2 2
2 3
2 4
3 0
3 1
3 2
3 3
3 4
4 0
4 1
4 2
4 3
4 4


In [9]:
print(Result)
#print(C) 
np.savetxt("Data_2.csv",np.around(Result,decimals=10),delimiter=',')

[[0.81432836 0.81432836 0.81432836 0.81432836 0.81432836]
 [0.82865672 0.82865672 0.82865672 0.82985075 0.83343284]
 [0.87164179 0.87164179 0.87164179 0.9080597  0.84238806]
 [0.84358209 0.84358209 0.84358209 0.89910448 0.8238806 ]
 [0.84179104 0.84179104 0.84179104 0.89731343 0.86985075]]


In [59]:
a = np.array([0.01 , 0.1 , 1, 10 ,100 , 1000])


0 0
