In [10]:
fraqs = """CNN and LSTM:

Q: What is the purpose of using CNN in this project?
A: CNN is used for feature extraction from MRI brain images.

Q: Why might you integrate LSTM with CNN?
A: To capture temporal or sequential patterns if dealing with sequential MRI slices or time-series data.

Q: What type of data is used in this project?
A: MRI scans of the brain for tumor classification.

Q: How do you preprocess images for CNN?
A: Resize, normalize pixel values, and convert them into tensors.

Q: What are the common output classes for brain tumor classification?
A: Tumor types like glioma, meningioma, pituitary tumor, or no tumor.

Q: Which loss function is suitable for this classification task?
A: Categorical Crossentropy for multi-class classification.

Q: What optimizer can be used for training the model?
A: Adam optimizer is commonly used for its efficiency.

Q: How do you avoid overfitting in the CNN-LSTM model?
A: Use techniques like dropout, data augmentation, and regularization.

Q: What is the input format for LSTM?
A: A sequence of features (e.g., extracted from CNN layers).

Q: Which metric is commonly used to evaluate the model's performance?
A: Accuracy, precision, recall, and F1-score.

Q: How do you handle class imbalance in the dataset?
A: By using techniques like oversampling, undersampling, or class weights.

Q: What is transfer learning, and why is it useful here?
A: Reusing pre-trained CNN models like VGG16 to leverage learned features and reduce training time.

Q: Can this model work for real-time detection?
A: Yes, with optimizations, it can be adapted for real-time applications.

Q: What activation function is used in the final layer for multi-class classification?
A: Softmax activation function.

Q: What is the role of batch normalization in CNN?
A: It stabilizes and speeds up training by normalizing layer inputs.

Q: What library can you use for implementing CNN and LSTM?
A: Libraries like TensorFlow or PyTorch.

Q: How do you split the dataset?
A: Into training, validation, and testing sets, typically in a 70-15-15 ratio.

Q: How do you visualize the model’s performance?
A: Using confusion matrices, accuracy/loss plots, and ROC curves.

Q: What data augmentation techniques are useful for this project?
A: Rotation, flipping, zooming, and shifting.

Q: How do you save the trained model?
A: Save the model using .h5 or .pth format for future use."""

In [11]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer

In [12]:
tokenizer = Tokenizer()

In [14]:
tokenizer.fit_on_texts([fraqs])

In [35]:
len(tokenizer.word_index)

173

In [22]:
input_sequences = []
for sentence in fraqs.split('\n'):
    tokenized_sentence = tokenizer.texts_to_sequences([sentence])[0]
    for i in range(1,len(tokenized_sentence)):
        input_sequences.append(tokenized_sentence[:i+1])

In [23]:
input_sequences

[[7, 5],
 [7, 5, 15],
 [2, 8],
 [2, 8, 6],
 [2, 8, 6, 4],
 [2, 8, 6, 4, 63],
 [2, 8, 6, 4, 63, 16],
 [2, 8, 6, 4, 63, 16, 22],
 [2, 8, 6, 4, 63, 16, 22, 7],
 [2, 8, 6, 4, 63, 16, 22, 7, 10],
 [2, 8, 6, 4, 63, 16, 22, 7, 10, 17],
 [2, 8, 6, 4, 63, 16, 22, 7, 10, 17, 28],
 [1, 7],
 [1, 7, 6],
 [1, 7, 6, 11],
 [1, 7, 6, 11, 3],
 [1, 7, 6, 11, 3, 64],
 [1, 7, 6, 11, 3, 64, 65],
 [1, 7, 6, 11, 3, 64, 65, 37],
 [1, 7, 6, 11, 3, 64, 65, 37, 29],
 [1, 7, 6, 11, 3, 64, 65, 37, 29, 30],
 [1, 7, 6, 11, 3, 64, 65, 37, 29, 30, 38],
 [2, 39],
 [2, 39, 66],
 [2, 39, 66, 9],
 [2, 39, 66, 9, 67],
 [2, 39, 66, 9, 67, 15],
 [2, 39, 66, 9, 67, 15, 31],
 [2, 39, 66, 9, 67, 15, 31, 7],
 [1, 32],
 [1, 32, 68],
 [1, 32, 68, 69],
 [1, 32, 68, 69, 12],
 [1, 32, 68, 69, 12, 40],
 [1, 32, 68, 69, 12, 40, 70],
 [1, 32, 68, 69, 12, 40, 70, 71],
 [1, 32, 68, 69, 12, 40, 70, 71, 72],
 [1, 32, 68, 69, 12, 40, 70, 71, 72, 31],
 [1, 32, 68, 69, 12, 40, 70, 71, 72, 31, 40],
 [1, 32, 68, 69, 12, 40, 70, 71, 72, 31, 40, 29

In [26]:
max_len = max([len(x) for x in input_sequences])

In [27]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
padded_input_sequences = pad_sequences(input_sequences,maxlen=max_len,padding='pre')

In [28]:
padded_input_sequences

array([[  0,   0,   0, ...,   0,   7,   5],
       [  0,   0,   0, ...,   7,   5,  15],
       [  0,   0,   0, ...,   0,   2,   8],
       ...,
       [  0,   0,   0, ..., 172,  50,   3],
       [  0,   0,   0, ...,  50,   3, 173],
       [  0,   0,   0, ...,   3, 173,  34]])

In [29]:
X = padded_input_sequences[:,:-1]

In [31]:
Y = padded_input_sequences[:,-1]

In [32]:
Y

array([  5,  15,   8,   6,   4,  63,  16,  22,   7,  10,  17,  28,   7,
         6,  11,   3,  64,  65,  37,  29,  30,  38,  39,  66,   9,  67,
        15,  31,   7,  32,  68,  69,  12,  40,  70,  71,  72,  31,  40,
        29,  73,  12,  23,  74,  24,   8,  75,  16,  24,   6,  11,  10,
        17,  28,  29,  76,  16,   4,  30,   3,  18,  19,  13,  14,   9,
        77,  38,   3,   7,  78,  79,  80,  81,   5,  82,  83,  41,  84,
         8,  42,   4,  85,  86,  87,   3,  30,  18,  19,  18,  88,  20,
        89,  90,  91,  18,  12,  92,  18,  43,  44,  33,   6,  93,   3,
        17,  19,  94,  95,  96,   3,  45,  25,  19,   8,  46,  26,  47,
        11,   3,  27,   4,  21,  97,  46,   6,  48,  11,   3,  98,  99,
        13,  14,   9, 100, 101,  10,   4,   7,  15,  21,  34,  35,  20,
       102,  24,  49,   5, 103,   8,   6,   4, 104,  50,   3,  15,   1,
       105,  16,  51, 106, 107, 108,  37,   7, 109,  43, 110,   6,  48,
        11,  32, 111,   4, 112,  52,  53, 113, 114,   5, 115, 11

In [53]:
X.shape

(360, 16)

In [54]:
Y.shape

(360,)

In [55]:
from tensorflow.keras.utils import to_categorical
y=to_categorical(Y,num_classes=174)

In [56]:
y

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [57]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,LSTM,Dense

In [89]:
model = Sequential()
model.add(Embedding(174,100,input_length=30))
model.add(LSTM(150))
model.add(Dense(174,activation='softmax'))

In [90]:
model.compile(loss='categorical_crossentropy',optimizer = 'adam',metrics=['accuracy'])

In [91]:
model.summary()

In [94]:
model.fit(X,y,epochs=99)

Epoch 1/99
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.8756 - loss: 0.4183
Epoch 2/99
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.8811 - loss: 0.3928
Epoch 3/99
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.8775 - loss: 0.4126
Epoch 4/99
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.8733 - loss: 0.3823
Epoch 5/99
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.8779 - loss: 0.3936
Epoch 6/99
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.8706 - loss: 0.4000
Epoch 7/99
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.8858 - loss: 0.3648
Epoch 8/99
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.8637 - loss: 0.4002
Epoch 9/99
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1e1b4153e50>

In [97]:
text=" CNN is used for feature extraction from"
for i in range(2):
   token_text=tokenizer.texts_to_sequences([text])[0]
   padded_token_text =pad_sequences([token_text],maxlen=56,padding='pre')
   pos =np.argmax(model.predict(padded_token_text))
   for word,index in tokenizer.word_index.items():
      if index==pos:
         text = text+" " + word
         print(text)
        

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
 CNN is used for feature extraction from is
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
 CNN is used for feature extraction from is used


In [86]:
import numpy as np 

In [82]:
tokenizer.word_index

{'a': 1,
 'q': 2,
 'for': 3,
 'the': 4,
 'and': 5,
 'is': 6,
 'cnn': 7,
 'what': 8,
 'you': 9,
 'in': 10,
 'used': 11,
 'or': 12,
 'how': 13,
 'do': 14,
 'lstm': 15,
 'of': 16,
 'this': 17,
 'tumor': 18,
 'classification': 19,
 'like': 20,
 'model': 21,
 'using': 22,
 'time': 23,
 'data': 24,
 'class': 25,
 'can': 26,
 'training': 27,
 'project': 28,
 'mri': 29,
 'brain': 30,
 'with': 31,
 'to': 32,
 'function': 33,
 'use': 34,
 'techniques': 35,
 'it': 36,
 'from': 37,
 'images': 38,
 'why': 39,
 'sequential': 40,
 'into': 41,
 'are': 42,
 'which': 43,
 'loss': 44,
 'multi': 45,
 'optimizer': 46,
 'be': 47,
 'commonly': 48,
 'augmentation': 49,
 'format': 50,
 'features': 51,
 'performance': 52,
 'accuracy': 53,
 'dataset': 54,
 'by': 55,
 'useful': 56,
 'trained': 57,
 'real': 58,
 'activation': 59,
 'layer': 60,
 '15': 61,
 'save': 62,
 'purpose': 63,
 'feature': 64,
 'extraction': 65,
 'might': 66,
 'integrate': 67,
 'capture': 68,
 'temporal': 69,
 'patterns': 70,
 'if': 71,
 'dea

overfitting
