<a href="https://colab.research.google.com/drive/1ktHlfYzH46LyRVWdt0dQP5tQQrnRaNGF?usp=sharing" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open This File In Colab "/></a>

# **97% Accuracy** for the **FINAL MODEL**

# Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras

# Data Preparation

#### Scrapping data from various sources and put together as 'df'

# Importing Data

In [None]:
train=pd.read_csv('/kaggle/input/text-emotion-recognition/train.csv')
test=pd.read_csv('/kaggle/input/text-emotion-recognition/test.csv')
df=pd.concat([train, test])

In [None]:
df

# Data Preprocessing

Lower Casing

In [None]:
df['text']=df['text'].str.lower()

Punctuation

In [None]:
import re
def fix_puntuation(text):
  return re.sub("`","'",text)
df['text']=df['text'].astype(str).apply(lambda x: fix_puntuation(x))

Contractions

In [None]:
!pip install contractions --quiet
import contractions
def fix_contraction(text):
  return contractions.fix(text)
df['text']=df['text'].astype(str).apply(lambda x: fix_contraction(x))
df.head(5)

Cleaning - Removing unwanted characters

In [None]:
def cleaning(text):
  text=re.sub('[^a-zA-Z]|https?://\S+|www.\.\S+|<.*?|0-9>'," ",text) #except a-z, A-Z, 0-9 remaining all are are substituted as ' 
  text=re.sub('\s+',' ',text) #additional spaces are removed using this statement
  return text
df['text']=df['text'].astype(str).apply(lambda x: cleaning(x))
df.sample(10)

Remove stopwords

In [None]:
import nltk #Natural Language Toolkit

nltk.download('stopwords')

In [None]:
from nltk.corpus import stopwords
s=list(stopwords.words('english'))
s.remove('no')
s.remove('nor')
s.remove('not')
s=set(s)
def rem_s(text):
  return " ".join([word for word in text.split()if word not in s])

In [None]:
df['text']=df['text'].astype(str).apply(lambda x: rem_s(x))
df.sample(10)

Abbrevations

In [None]:
import csv
import re
data=pd.read_csv('/content/drive/MyDrive/Sentiment Analysis/slang.txt', names=['abbr', 'fullform'], header=None, delimiter='=')
abbr=data['abbr'].tolist()
fullform=data['fullform'].tolist()
def translator(user_string):
    user_string = user_string.split(" ")
    j = 0
    # print(dataFromFile)
    for _str in user_string:
      # File path which consists of Abbreviations.
      for i in range(len(abbr)):
        # Check if selected word matches short forms[LHS] in text file.
        if _str.upper() == abbr[i]:
          # If match found replace it with its appropriate phrase in text file.
          user_string[j] = fullform[i]
      j=j+1
    # Replacing commas with spaces for final output.
    return ' '.join(user_string)
df['text']=df['text'].astype(str).apply(lambda x: translator(x))
df.head(10)

Missing values

In [None]:
df.isna().sum()

In [None]:
df.dropna(inplace=True)

Duplicates

In [None]:
df.duplicated().sum()

In [None]:
df.drop_duplicates(inplace=True)

In [None]:
df.reset_index(drop=True, inplace=True)

Defining X and y

In [None]:
X=df['text']
y=df['emotion']

# Classical ML

Text Conversion

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split

In [None]:
cv=CountVectorizer(lowercase=True,ngram_range=(1,1))

In [None]:
X_ml=cv.fit_transform(df['text'])

In [None]:
X_ml.shape

In [None]:
y=df.emotion

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_ml,y,test_size=0.1,random_state=2401)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

Creating Model

In [None]:
# from sklearn.linear_model import LogisticRegression
# model=LogisticRegression()#Logistic Regression Model %76

In [None]:
# from sklearn.neighbors import KNeighborsClassifier
# model=KNeighborsClassifier()#K-Nearest Neighbour Model %66

In [None]:
# from sklearn.tree import DecisionTreeClassifier
# model=DecisionTreeClassifier()#Decision Tree Model %69

In [None]:
# from sklearn.svm import SVC
# model=SVC() #Support Vector Machine

In [None]:
# from sklearn.ensemble import RandomForestClassifier
# model=RandomForestClassifier(n_estimators=200)

In [None]:
from sklearn.neural_network import MLPClassifier
model=MLPClassifier(activation='logistic', alpha=0.01, batch_size=64,
              early_stopping=True, hidden_layer_sizes=(128,),
              learning_rate='adaptive', max_iter=1000, shuffle=True) #78

Training Model

In [None]:
model.fit(X_train,y_train)

Prediction Model

In [None]:
y_pred=model.predict(X_test)

Accuracy

In [None]:
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score

In [None]:
confusion_matrix(y_test,y_pred)

In [None]:
print(classification_report(y_test,y_pred))

In [None]:
accuracy_score(y_test,y_pred)

# DEEP LEARNING

In [None]:
text=[]
for i in range(df.shape[0]):
  text.append(len(df.iloc[i][0].split()))

In [None]:
max(text), min(text), np.mean(text)

In [None]:
plt.figure(figsize=(12,8))
n, bins, patches = plt.hist(text, bins=10, edgecolor='black', color="red", alpha=0.75)
# Show the plot
plt.show()

## *Tokenization*

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

#parameters
vocab_size=50000
max_length=30
trunc_type='post'
oov_tok='<OOV>'

# Initialize the Tokenizer class
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)

# Generate the word index dictionary
tokenizer.fit_on_texts(df['text'])

# Print the length of the word index
word_index = tokenizer.word_index
print(f'number of words in word_index: {len(word_index)}')

# Print the word index
print(f'word_index: {word_index}')
print()

# Generate and pad the sequences
sequences = tokenizer.texts_to_sequences(df['text'])
padded = pad_sequences(sequences, padding='post', truncating=trunc_type, maxlen=max_length)

# Print a sample headline
i=5
print(f"sample headline: {df['text'][i]}")
print(f'padded sequence: {padded[i]}')
print()

# Print dimensions of padded sequences
print(f'shape of padded sequences: {padded.shape}')

In [None]:
X=pd.DataFrame(padded)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, df.emotion, random_state=2401, test_size=0.1)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

## ANN

In [None]:
model=tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(8,"tanh",input_dim=X_train_dl.shape[1]))
model.add(tf.keras.layers.Dense(16,"relu"))
model.add(tf.keras.layers.Dense(7,"softmax"))
model.compile(optimizer="sgd", loss="sparse_categorical_crossentropy", metrics=["acc"])

epochs=101
history=model.fit(X_train_dl, y_train, epochs=epochs, batch_size = 16,validation_split=0.15, shuffle=True, callbacks = [tf.keras.callbacks.EarlyStopping(
    monitor='val_acc',
    patience=10,
    restore_best_weights=True
)])

In [None]:
model.evaluate(X_test_dl,y_test)

In [None]:
plt.figure(figsize=(12,6))
plt.plot(range(15),history.history['acc'],label='Training Accuracy',color='r')
plt.plot(range(15),history.history['val_acc'],label='Validation Accuracy',color='black')

plt.xlabel("Epoch")
plt.ylabel("Accuracy")

plt.legend()
plt.title('Accuracy v/s Time')

plt.show()

## RNN

### LSTM

In [None]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Embedding(vocab_size, 10000, input_length=X_train_dl.shape[1]))
model.add(tf.keras.layers.SpatialDropout1D(0.2))
model.add(tf.keras.layers.LSTM(32, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
model.add(tf.keras.layers.LSTM(16, dropout=0.2, recurrent_dropout=0.2, return_sequences=False))
model.add(tf.keras.layers.Dense(8, activation='relu'))
model.add(tf.keras.layers.Dense(1,activation='sigmoid'))
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),optimizer=keras.optimizers.Adam(learning_rate=0.002529), metrics=["acc"])
model.summary()

In [None]:
epochs=1000
history=model.fit(X_train_dl, y_train, epochs=epochs, batch_size = 16,validation_split=0.15,callbacks = [tf.keras.callbacks.EarlyStopping(
    monitor='val_acc',
    min_delta=0,
    patience=10,
    verbose=0,
    mode='auto',
    baseline=None,
    restore_best_weights=False
)])

In [None]:
model.evaluate(X_test_dl,y_test)

In [None]:
plt.figure(figsize=(12,6))
epochs = len(history.history['binary_accuracy'])
plt.plot(range(epochs),history.history['binary_accuracy'],label='Training Accuracy',color='r')
plt.plot(range(epochs),history.history['val_binary_accuracy'],label='Validation Accuracy',color='black')

plt.xlabel("Epoch")
plt.ylabel("Accuracy")

plt.legend()
plt.title('Accuracy v/s Time')

plt.show()

### Bi-LSTM

In [None]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Embedding(vocab_size, 10000, input_length=X_train_dl.shape[1]))
model.add(tf.keras.layers.SpatialDropout1D(0.2))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(8, dropout=0.2, recurrent_dropout=0.2, return_sequences=False)))
# model.add(tf.keras.Bidirectional(tf.keras.layers.GRU(32, dropout=0.1, recurrent_dropout=0.1)))
model.add(tf.keras.layers.Dense(4, activation='relu'))
model.add(tf.keras.layers.Dense(1,activation='softmax'))
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),optimizer=keras.optimizers.Adam(learning_rate=0.002529), metrics=["acc"])
model.summary()

In [None]:
epochs=1000
history=model.fit(X_train_dl, y_train, epochs=epochs, batch_size = 64,validation_split=0.15,callbacks = [tf.keras.callbacks.EarlyStopping(
    monitor='val_acc',
    min_delta=0,
    patience=10,
    verbose=0,
    mode='auto',
    baseline=None,
    restore_best_weights=False
)])

In [None]:
model.evaluate(X_test_dl,y_test)

In [None]:
plt.figure(figsize=(12,6))
epochs = len(history.history['binary_accuracy'])
plt.plot(range(epochs),history.history['binary_accuracy'],label='Training Accuracy',color='r')
plt.plot(range(epochs),history.history['val_binary_accuracy'],label='Validation Accuracy',color='black')

plt.xlabel("Epoch")
plt.ylabel("Accuracy")

plt.legend()
plt.title('Accuracy v/s Time')

plt.show()

## CNN+LSTM-1

In [None]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Embedding(90000, 32, input_length=max_length))
model.add(tf.keras.layers.SpatialDropout1D(0.2))
model.add(tf.keras.layers.Conv1D(56,5,activation='relu',padding='same'))
model.add(tf.keras.layers.SpatialDropout1D(0.2))
model.add(tf.keras.layers.LSTM(8, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
model.add(tf.keras.layers.Conv1D(56,3,activation='relu',padding='same'))
model.add(tf.keras.layers.SpatialDropout1D(0.2))
model.add(tf.keras.layers.LSTM(8, dropout=0.2, recurrent_dropout=0.2, return_sequences=False))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(1,activation='sigmoid'))
model.compile(loss=tf.keras.losses.BinaryFocalCrossentropy(gamma=2,from_logits=False),optimizer=keras.optimizers.Adam(learning_rate=0.002529),metrics=[tf.keras.metrics.BinaryAccuracy()])
model.summary()

In [None]:
epochs=1000
history=model.fit(X_train_dl, y_train, epochs=epochs, batch_size = 64, validation_split=0.1, callbacks = [tf.keras.callbacks.EarlyStopping(
    monitor='val_binary_accuracy',
    min_delta=0,
    patience=10,
    verbose=0,
    mode='auto',
    baseline=None,
    restore_best_weights=False
)])

In [None]:
model.evaluate(X_test_dl,y_test)

In [None]:
plt.figure(figsize=(12,6))
epochs = len(history.history['binary_accuracy'])
plt.plot(range(epochs),history.history['binary_accuracy'],label='Training Accuracy',color='r')
plt.plot(range(epochs),history.history['val_binary_accuracy'],label='Validation Accuracy',color='black')

plt.xlabel("Epoch")
plt.ylabel("Accuracy")

plt.legend()
plt.title('Accuracy v/s Time')

plt.show()

## CNN+LSTM+BiLSTM

In [None]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Embedding(20000, 32, input_length=X_train_dl.shape[1]))
model.add(tf.keras.layers.SpatialDropout1D(0.2))
model.add(tf.keras.layers.Conv1D(256,7,activation='tanh',padding='same'))
model.add(tf.keras.layers.SpatialDropout1D(0.2))
model.add(tf.keras.layers.LSTM(64, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
model.add(tf.keras.layers.Conv1D(16,7,activation='relu',padding='same'))
model.add(tf.keras.layers.SpatialDropout1D(0.2))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(8, dropout=0.2, recurrent_dropout=0.2, return_sequences=False)))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(3,activation='softmax'))
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),optimizer=keras.optimizers.Adam(learning_rate=0.002529), metrics=["acc"])
model.summary()

In [None]:
epochs=1000
history=model.fit(X_train_dl, y_train, epochs=epochs, batch_size = 32,validation_split=0.15,callbacks = [tf.keras.callbacks.EarlyStopping(
    monitor='val_acc',
    min_delta=0,
    patience=5,
    verbose=0,
    mode='auto',
    baseline=None,
    restore_best_weights=False
)])

In [None]:
model.evaluate(X_test_dl,y_test)

In [None]:
plt.figure(figsize=(12,6))
plt.plot(range(12),history.history['acc'],label='Training Accuracy',color='r')
plt.plot(range(12),history.history['val_acc'],label='Validation Accuracy',color='black')

plt.xlabel("Epoch")
plt.ylabel("Accuracy")

plt.legend()
plt.title('Accuracy v/s Time')

plt.show()

## CNN+LSTM+GRU

In [None]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Embedding(vocab_size, 32, input_length=X_train_dl.shape[1]))
model.add(tf.keras.layers.SpatialDropout1D(0.2))
model.add(tf.keras.layers.Conv1D(256,7,activation='tanh',padding='same'))
model.add(tf.keras.layers.SpatialDropout1D(0.2))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)))
model.add(tf.keras.layers.LSTM(32, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
model.add(tf.keras.layers.Conv1D(16,7,activation='relu',padding='same'))
model.add(tf.keras.layers.SpatialDropout1D(0.2))
model.add(tf.keras.layers.LSTM(32, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(8, dropout=0.2, recurrent_dropout=0.2, return_sequences=False)))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(3,activation='softmax'))
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),optimizer=keras.optimizers.Adam(learning_rate=0.002529), metrics=["acc"])
model.summary()

In [None]:
epochs=1000
history=model.fit(X_train_dl, y_train, epochs=epochs, batch_size = 32,validation_split=0.15,callbacks = [tf.keras.callbacks.EarlyStopping(
    monitor='val_acc',
    min_delta=0,
    patience=5,
    verbose=0,
    mode='auto',
    baseline=None,
    restore_best_weights=False
)])

In [None]:
model.evaluate(X_test_dl,y_test)

In [None]:
plt.figure(figsize=(12,6))
plt.plot(range(7),history.history['acc'],label='Training Accuracy',color='r')
plt.plot(range(7),history.history['val_acc'],label='Validation Accuracy',color='black')

plt.xlabel("Epoch")
plt.ylabel("Accuracy")

plt.legend()
plt.title('Accuracy v/s Time')

plt.show()

##Hyperparameter tunimg using keras tuner

In [None]:
!pip install keras-tuner --quiet

In [None]:
import keras_tuner
from kerastuner.tuners import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters
import time

LOG = f'{int(time.time())}'

In [None]:
def call_existing_code(vocab_size, embedding_dim, conv_units1, conv_units2, kernel_size, lstm_units1, lstm_units2, activation1, activation2, lr):
  model = tf.keras.models.Sequential()
  model.add(tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=X_train_dl.shape[1]))
  model.add(tf.keras.layers.Conv1D(filters=conv_units1, kernel_size=kernel_size, activation=activation1, padding='same'))
  model.add(tf.keras.layers.LSTM(units=lstm_units1, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
  model.add(tf.keras.layers.Conv1D(filters=conv_units2, kernel_size=kernel_size, activation=activation2, padding='same'))
  model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=lstm_units2, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)))
  model.add(tf.keras.layers.Conv1D(filters=conv_units2, kernel_size=kernel_size, activation=activation1, padding='same'))
  model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=lstm_units1, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)))
  model.add(tf.keras.layers.LSTM(units=lstm_units2, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
  model.add(tf.keras.layers.Conv1D(filters=conv_units1, kernel_size=kernel_size, activation=activation2, padding='same'))
  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(1,activation='sigmoid'))
  model.compile(loss=tf.keras.losses.BinaryFocalCrossentropy(gamma=1,from_logits=False),optimizer=keras.optimizers.Adam(learning_rate=lr),metrics=[tf.keras.metrics.BinaryAccuracy()])
  model.summary()
  return model


def build_model(hp):
  vocab_size=hp.Int("vocab_size", min_value=50000, max_value=100000, step=5000)
  embedding_dim=hp.Int("em_dim", min_value=4, max_value=128, step=16)
  conv_units1 = hp.Int("conv_units1", min_value=8, max_value=512, step=16)
  conv_units2 = hp.Int("conv_units2", min_value=16, max_value=256, step=16)
  kernel_size = hp.Int("kernel_size", min_value=3, max_value=10, step=1)
  lstm_units1 = hp.Int("lstm_units1", min_value=8, max_value=256, step=4)
  lstm_units2 = hp.Int("lstm_units2", min_value=2, max_value=512, step=8)
  activation1 = hp.Choice("activation1", ["relu", "tanh"])
  activation2 = hp.Choice("activation2", ["relu", "tanh"])

  # dropout = hp.Boolean("dropout")
  lr = hp.Float("lr", min_value=1e-4, max_value=1e-2, sampling="log")
  # call existing model-building code with the hyperparameter values.
  model = call_existing_code(
      vocab_size=vocab_size, embedding_dim=embedding_dim, conv_units1=conv_units1, conv_units2=conv_units2, kernel_size=kernel_size, lstm_units1=lstm_units1, lstm_units2=lstm_units2, activation1=activation1, activation2=activation2, lr=lr
  )
  return model

tuner=RandomSearch(
    build_model,
    objective="val_binary_accuracy",
    max_trials=5,
    executions_per_trial=5,
    directory=LOG
)

tuner.search(
    x=X_train_dl,
    y=y_train,
    epochs=5,
    batch_size=128,
    validation_data=(X_test_dl,y_test)
)

print(tuner.get_best_hyperparameters()[0].values)
best_model = tuner.get_best_models()[0]
best_model.summary()

# **Final Model**

In [None]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Embedding(80000, 36, input_length=X_train.shape[1]))
# model.add(tf.keras.layers.SpatialDropout1D(0.2))
# model.add(tf.keras.layers.MaxPooling1D(data_format='channels_first'))
model.add(tf.keras.layers.Conv1D(136,7,activation='relu',padding='same'))
# model.add(tf.keras.layers.SpatialDropout1D(0.2))
model.add(tf.keras.layers.LSTM(76, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
model.add(tf.keras.layers.Conv1D(192,5,activation='tanh',padding='same'))
# model.add(tf.keras.layers.SpatialDropout1D(0.2))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(458, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)))
# model.add(tf.keras.layers.GlobalMaxPooling1D(data_format='channels_first', keepdims=True))
model.add(tf.keras.layers.Conv1D(192,5,activation='relu',padding='same'))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(76, dropout=0.2, recurrent_dropout=0.2, return_sequences=True)))
model.add(tf.keras.layers.AveragePooling1D(data_format='channels_first'))
model.add(tf.keras.layers.LSTM(458, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))
# model.add(tf.keras.layers.GlobalAveragePooling1D(keepdims=True, data_format='channels_first'))
model.add(tf.keras.layers.Conv1D(136,7,activation='tanh',padding='same'))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(1,activation='sigmoid'))
model.compile(loss=tf.keras.losses.BinaryFocalCrossentropy(gamma=1,from_logits=False),optimizer=keras.optimizers.Adam(learning_rate=0.00091241),metrics=[tf.keras.metrics.BinaryAccuracy()])
model.summary()

In [None]:
epochs=1000
history=model.fit(X_train, y_train, epochs=epochs, batch_size = 1024,validation_data=(X_test,y_test),callbacks = [tf.keras.callbacks.EarlyStopping(
    monitor='val_binary_accuracy',
    min_delta=0,
    patience=5,
    verbose=0,
    mode='auto',
    baseline=None,
    restore_best_weights=False
)])

In [None]:
model.evaluate(X_test,y_test)

In [None]:
plt.figure(figsize=(12,6))
plt.plot(range(9),history.history['binary_accuracy'],label='Training Accuracy',color='r')
plt.plot(range(9),history.history['val_binary_accuracy'],label='Validation Accuracy',color='black')

plt.xlabel("Epoch")
plt.ylabel("Accuracy")

plt.legend()
plt.title('Accuracy v/s Time')

plt.show()

# Testing

In [None]:
y_pred=model.predict(X_test)

In [None]:
y_pred

In [None]:
for i in range(len(y_pred)):
  if y_pred[i]>=0.5:
    y_pred[i]=1
  else:
    y_pred[i]=0
y_pred=np.array(y_pred).astype(int)
y_pred

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
print(classification_report(y_test,y_pred))

In [None]:
conf_matrix = confusion_matrix(y_true=y_test, y_pred=y_pred)
fig, ax = plt.subplots(figsize=(7.5, 7.5))
ax.matshow(conf_matrix, cmap='inferno', alpha=0.7)
for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        ax.text(x=j, y=i,s=conf_matrix[i, j], va='center', ha='center', size='xx-large')
 
plt.xlabel('Predictions', fontsize=18)
plt.ylabel('Actuals', fontsize=18)
plt.title('Confusion Matrix', fontsize=18)
plt.show()