In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from keras.models import Model
from sklearn.metrics import classification_report
from keras import layers, models, optimizers
import sklearn.metrics as metrics

### For 2 labels:

In [None]:
speech_data = pd.read_csv(".csv")

# split the dataset into training and testing datasets 
x_train, x_test, y_train, y_test = train_test_split(speech_data['a'], speech_data['b'], test_size=0.3, random_state=1)


# create a count vectorizer object 
cv = CountVectorizer(stop_words='english')
cv.fit(speech_data['a'])

# transform the training and testing data using count vectorizer object
x_train_count =  cv.transform(x_train)
x_test_count =  cv.transform(x_test)

tfidf_tr = TfidfTransformer()
x_train_count_tfidf = tfidf_tr.fit_transform(x_train_count)
x_test_count_tfidf = tfidf_tr.transform(x_test_count)

# ngram level tf-idf 
tv_ngram = TfidfVectorizer(ngram_range=(1,2))
tv_ngram.fit(speech_data['a'])
x_train_tfidf_ngram =  tv_ngram.transform(x_train)
x_test_tfidf_ngram =  tv_ngram.transform(x_test)

In [None]:
def classifierModel(classifier, Xtrain, Ytrain, Xtest):
    classifier.fit(Xtrain, Ytrain)
    predictions = classifier.predict(Xtest)
    print("\nAccuracy : ", accuracy_score(predictions, y_test))
    
    target_names = ['class 0', 'class 1']
    print(classification_report(y_test, predictions, target_names=target_names))

In [None]:
print("\nNaive Bayes with Count Vectors: ")
classifierModel(MultinomialNB(), x_train_count, y_train, x_test_count)

print("\nNaive Bayes with Count Vectors + TF-IDF: ")
classifierModel(MultinomialNB(), x_train_count_tfidf, y_train, x_test_count_tfidf)

print("\nNaive Bayes with N-Gram Vectors: ")
classifierModel(MultinomialNB(), x_train_tfidf_ngram, y_train, x_test_tfidf_ngram)


In [None]:
print("\nLinearSVM with Count Vectors: ")
classifierModel(SVC(kernel='linear'), x_train_count, y_train, x_test_count)

print("\nLinearSVM with Count Vectors + TF-IDF: ")
classifierModel(SVC(kernel='linear'), x_train_count_tfidf, y_train, x_test_count_tfidf)

print("\nLinearSVM with N-Gram Vectors: ")
classifierModel(SVC(kernel='linear'), x_train_tfidf_ngram, y_train, x_test_tfidf_ngram)


In [None]:
print("\nRandom Forrest Classifier with Count Vectors: ")
classifierModel(RandomForestClassifier(random_state=40,n_estimators=1024), x_train_count, y_train, x_test_count)

print("\nRandom Forrest Classifier with Count Vectors + TF-IDF: ")
classifierModel(RandomForestClassifier(random_state=40,n_estimators=1024), x_train_count_tfidf, y_train, x_test_count_tfidf)

print("\nRandom Forrest Classifier with N-Gram Vectors: ")
classifierModel(RandomForestClassifier(random_state=40,n_estimators=1024), x_train_tfidf_ngram, y_train, x_test_tfidf_ngram)


In [None]:
def rnnModel(Xtrain, Ytrain, Xtest):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(1000, 64),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(10)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    model.fit(Xtrain, Ytrain, batch_size=30, epochs=10)
    
    test_loss, test_acc = model.evaluate(Xtest, y_test)
    print("\nAccuracy : ",test_acc)
    
    predictions = model.predict(Xtest).ravel()
    
    target_names = ['class 0', 'class 1']
    print(classification_report(y_test, predictions.round(), target_names=target_names)) 

In [None]:
print("\nRNN with Count Vectors: ")
rnnModel(x_train_count, y_train, x_test_count)

print("\nRNN with  Count Vectors + TF-IDF: ")
rnnModel(x_train_count_tfidf, y_train, x_test_count_tfidf)

print("\nRNN with N-Gram Vectors: ")
rnnModel(x_train_tfidf_ngram, y_train, x_test_tfidf_ngram)

### For 3 Labels:

In [None]:
speech = pd.read_csv(".csv")

# split the dataset into training and testing datasets 
x_train, x_test, y_train, y_test = train_test_split(speech['a'], speech['b'], test_size=0.3, random_state=1)

# create a count vectorizer object 
cv = CountVectorizer(stop_words='english')
cv.fit(speech['a'])

# transform the training and testing data using count vectorizer object
x_train_count =  cv.transform(x_train)
x_test_count =  cv.transform(x_test)

tfidf_tr = TfidfTransformer()
x_train_count_tfidf = tfidf_tr.fit_transform(x_train_count)
x_test_count_tfidf = tfidf_tr.transform(x_test_count)

# ngram level tf-idf 
tv_ngram = TfidfVectorizer(ngram_range=(1,2))
tv_ngram.fit(speech['a'])
x_train_tfidf_ngram =  tv_ngram.transform(x_train)
x_test_tfidf_ngram =  tv_ngram.transform(x_test)


In [None]:
def classifierModel(classifier, Xtrain, Ytrain, Xtest):
    classifier.fit(Xtrain, Ytrain)
    predictions = classifier.predict(Xtest)
    print("\nAccuracy : ", accuracy_score(predictions, y_test))
    
    target_names = ['class 0', 'class 1', 'class 2']
    print(classification_report(y_test, predictions, target_names=target_names))

In [None]:
print("\nNaive Bayes with Count Vectors: ")
classifierModel(MultinomialNB(), x_train_count, y_train, x_test_count)

print("\nNaive Bayes with Count Vectors + TF-IDF: ")
classifierModel(MultinomialNB(), x_train_count_tfidf, y_train, x_test_count_tfidf)

print("\nNaive Bayes with N-Gram Vectors: ")
classifierModel(MultinomialNB(), x_train_tfidf_ngram, y_train, x_test_tfidf_ngram)

In [None]:
print("\nLinearSVM with Count Vectors: ")
classifierModel(LinearSVC(), x_train_count, y_train, x_test_count)

print("\nLinearSVM with Count Vectors + TF-IDF: ")
classifierModel(LinearSVC(), x_train_count_tfidf, y_train, x_test_count_tfidf)

print("\nLinearSVM with N-Gram Vectors: ")
classifierModel(LinearSVC(), x_train_tfidf_ngram, y_train, x_test_tfidf_ngram)


In [None]:
print("\nLinearSVM with Count Vectors: ")
classifierModel(SVC(kernel='linear'), x_train_count, y_train, x_test_count)

print("\nLinearSVM with Count Vectors + TF-IDF: ")
classifierModel(SVC(kernel='linear'), x_train_count_tfidf, y_train, x_test_count_tfidf)

print("\nLinearSVM with N-Gram Vectors: ")
classifierModel(SVC(kernel='linear'), x_train_tfidf_ngram, y_train, x_test_tfidf_ngram)

In [None]:
print("\nRandom Forrest Classifier with Count Vectors: ")
classifierModel(RandomForestClassifier(random_state=40,n_estimators=1024), x_train_count, y_train, x_test_count)

print("\nRandom Forrest Classifier with Count Vectors + TF-IDF: ")
classifierModel(RandomForestClassifier(random_state=40,n_estimators=1024), x_train_count_tfidf, y_train, x_test_count_tfidf)

print("\nRandom Forrest Classifier with N-Gram Vectors: ")
classifierModel(RandomForestClassifier(random_state=40,n_estimators=1024), x_train_tfidf_ngram, y_train, x_test_tfidf_ngram)


In [None]:
def rnnModel(Xtrain, Ytrain, Xtest):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(1000, 64),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(10)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    model.fit(Xtrain, Ytrain, batch_size=30, epochs=10)
    
    test_loss, test_acc = model.evaluate(Xtest, y_test)
    print("\nAccuracy : ",test_acc)
    
    predictions = model.predict(Xtest)
    
    target_names = ['class 0', 'class 1', 'class 2']
    print(classification_report(y_test, predictions.round(), target_names=target_names)) 
    

In [None]:
print("\nRNN with Count Vectors: ")
rnnModel(x_train_count, y_train, x_test_count)

print("\nRNN with  Count Vectors + TF-IDF: ")
rnnModel(x_train_count_tfidf, y_train, x_test_count_tfidf)

print("\nRNN with N-Gram Vectors: ")
rnnModel(x_train_tfidf_ngram, y_train, x_test_tfidf_ngram)