In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
masoudnickparvar_brain_tumor_mri_dataset_path = kagglehub.dataset_download('masoudnickparvar/brain-tumor-mri-dataset')

print('Data source import complete.')


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.initializers import HeUniform, GlorotUniform
from sklearn.preprocessing import StandardScaler
import numpy as npa
import matplotlib.pyplot as plt
import cv2
import pandas as pd
import os
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix , accuracy_score
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import GlobalAveragePooling2D, LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import losses
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator



### 📌 1. Path Identification

In [None]:
import pathlib
data_dir = pathlib.Path(r"/kaggle/input/brain-tumor-mri-dataset/")

train_dir = '/kaggle/input/brain-tumor-mri-dataset/Training'
test_dir = '/kaggle/input/brain-tumor-mri-dataset/Testing'


# Method 2: You can try forward slash

### 📌 2. Data Preprocessing with ImageDataGenerator

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

### 📌 3. Defining Train & Test Folders

In [None]:
train_dir = str(data_dir / "Training")
test_dir = str(data_dir / "Testing")

### 📌 4. Define Image Size & Batch Size for Model

In [None]:
batch_size = 32
img_height = 240
img_width = 240

### 📌 5. Loading Training and Test Datasets

In [None]:
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size = (img_height , img_width),
    batch_size= batch_size,
    class_mode = "categorical"
    )

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size = (img_height , img_width),
    batch_size = batch_size,
    class_mode = "categorical"

    )

print(r"Class Label :" , train_generator.class_indices)


### DATA PREPROCESSING SUMMARY

🚀 What Does the Code Do in Brief?

* Determines the dataset folder (data_dir).

* Creates an ImageDataGenerator to rescale the images.

* Determines the training (train_dir) and test (test_dir) data paths.

* Resize the images to 240x240 and load them with flow_from_directory().

* Prints the class labels.

## Is it necessary to use ❗to_categorical?❗

No, there is no need to use to_categorical, because flow_from_directory(class_mode="categorical") already automatically converts the tags to one-hot encoding format.

### 📌 6. VISUALIZATION

In [None]:
import matplotlib.pyplot as plt

class_labels = list(train_generator.class_indices.keys())
class_counts = [len(train_generator.filepaths) // train_generator.num_classes] * len(class_labels)

plt.figure(figsize=(8, 5))
plt.bar(class_labels, class_counts, color='skyblue')
plt.xlabel("Classes")
plt.ylabel("Number of Samples")
plt.title("Class Distribution in Dataset")
plt.xticks(rotation=20)
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

x_batch, y_batch = next(train_generator)

plt.figure(figsize=(10, 5))
for i in range(6):
    plt.subplot(2, 3, i+1)
    plt.imshow(x_batch[i])
    plt.title(f"Class: {np.argmax(y_batch[i])}")
    plt.axis("off")
plt.show()


### 📌 7.LET'S DEVELOP THE MODEL

In [None]:

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

test_datagen = ImageDataGenerator(rescale=1./255)


In [None]:
model = Sequential([
    Conv2D(32 , (5,5) , activation="relu" ,padding="valid", kernel_initializer=HeUniform()),
    MaxPooling2D(2,2),
    BatchNormalization(),

    Conv2D(64 , (5,5) , activation="relu" ,padding="valid", kernel_initializer=HeUniform()),
    MaxPooling2D(2,2),
    BatchNormalization(),

    Conv2D(128 , (5,5) , activation="relu" ,padding="valid", kernel_initializer=HeUniform()),
    MaxPooling2D(2,2),
    BatchNormalization(),

    Flatten(),

    Dense(64 , activation="relu" , kernel_initializer=HeUniform()),
    Dropout(0.2),
    BatchNormalization(),
    Dense(128 , activation="relu" , kernel_initializer=HeUniform()),
    Dropout(0.2),
    BatchNormalization(),
    Dense(256 , activation="relu" , kernel_initializer=HeUniform()),
    Dropout(0.3),
    BatchNormalization(),

    Dense(4 , activation="softmax" , kernel_initializer=GlorotUniform()),

    ])

In [None]:
model.compile(optimizer=Adam(learning_rate=0.0001) , loss="categorical_crossentropy", metrics=["accuracy"])

In [None]:
early_stop = EarlyStopping(patience=3,monitor="val_loss", restore_best_weights=True)

fit = model.fit(train_generator,epochs=10 #(epoch count is reduced due to process length.)
, validation_data=(test_generator) , callbacks=[early_stop])

# LET’S TALK ABOUT SOME THEORETICAL TOPICS WHILE THE MODEL IS BEING TRAINED
### 1. Data Augmentation
* Scenario: We are training a model that distinguishes between cats and dogs for a classification problem. However, the number of training data is insufficient. In this case, it is very important to use data augmentation to prevent the model from overfitting.

* What We Will Do: Data augmentation creates new images by manipulating the training data. This allows the model to learn more generally. We can augment data using the following operations:

In [None]:
'''
train_datagen = ImageDataGenerator(
    rescale=1./255,           # Görselleri normalize et
    rotation_range=40,        # Görselleri 40 dereceye kadar döndür
    width_shift_range=0.2,    # Görselleri yatayda %20 kaydır
    height_shift_range=0.2,   # Görselleri dikeyde %20 kaydır
    shear_range=0.2,          # Görselleri eğik şekilde döndür
    zoom_range=0.2,           # Görselleri %20 yakınlaştır
    horizontal_flip=True,     # Görselleri yatayda çevir
    fill_mode='nearest'       # Kayıp pikselleri en yakın değerle doldur
)
'''

-----------

### 2. Data Normalization
* Scenario: Our training data may have different lighting conditions and color tones. This affects the model's learning process. In other words, it is important to bring the value of each pixel in the images to a standard level (normalize).

* What We Will Do: Assuming that pixel values ​​are generally between 0 and 255, it is a good method to pull them between 0 and 1. This process allows the model to learn faster.
it affects the process. In other words, it is important to bring the value of each pixel in the images to a standard level (normalize).

* What We Will Do: Assuming that pixel values ​​are generally between 0 and 255, it is a good method to pull them between 0 and 1. This process allows the model to learn faster. This process allows the model to learn faster.

*
 train_datagen = ImageDataGenerator(rescale=1./255)

----------

### 3. Forward and Backward Propagation
* Scenario: We may need to use backpropagation to learn if the model is making the right prediction and to improve the parameters. In each training step, the model compares the predicted value with the real labels and updates the weights by backpropagating the error.

* What We Will Do: This process is done with optimization algorithms (for example Adam or SGD).

#### Why Do We Use It?

* It tries to minimize the errors so that the model can make the right prediction.

* Optimization algorithms provide better updating of the weights.

---------------

### 4.ReLU vs Leaky ReLU: Which One to Use in Which Situation?

* ReLU: Provides fast learning and is generally used in datasets containing positive values. However, it can cause dead neuron problem.

* Leaky ReLU: Prevents dead neuron problem and uses a small gradient in negative values. It is preferred in deep networks and places where vanishing gradient problem occurs.
#### In short:
* ReLU: For simple and fast learning.
* Leaky ReLU: To prevent dead neuron and vanishing gradient problems.

----------

### 5. What are Dead Neurons and Vanishing Gradients?

* Dead Neuron: In activation functions like ReLU, negative inputs become zero. If a neuron consistently gives zero output, this neuron does not contribute to learning and becomes "dead". This can lead to the problem of learning nothing in some layers of the network.

* Vanishing Gradient: In deep networks, the gradients become very small during backpropagation. In this case, the weight updates are not large enough and the model cannot learn. This is especially seen in activation functions like sigmoid and tanh. The advantage of ReLU is that it prevents this problem.
-----------

### 6. How to Understand Dead Neuron and Vanishing Gradient?

* 1-) Dead Neuron Detection:

During training, some neurons consistently give zero output. You can observe this on the activations. If most of the neurons in a layer have zero activation, there may be a dead neuron problem.

Slight Meaning: If the accuracy of your model is increasing very slowly or is very low, there may be dead neurons.

* 2-) Vanishing Gradient Detection:

Tracing Gradients: During backpropagation, you can see that the gradients become very small. In this case, the weight updates are almost zero.

Fast Loss Progress: If your model learns very slowly during training and the loss is very small, there may be a vanishing gradient problem.
* How to Understand?

When monitoring the performance of the model during training, a slow decrease in loss or a slow increase in accuracy could be a sign of dead neurons or a vanishing gradient.

--------

### 7. Average Pooling vs Max Pooling: In Which Case Should Which One Be Used?

##### Max Pooling:

* It usually gives better results because it takes the highest value in each region, emphasizing important features.
* It is especially preferred in tasks such as classification and object recognition. This helps the model to capture important details and emphasize local features.
* In Which Case Should It Be Used?: For more detailed information extraction and strong overall performance, especially in cases where prominent features are prominent in the images.

#### Average Pooling:

* It provides smoother feature extraction because it takes the average of all values ​​in the region, which leads to less over-learning.

This method is generally more resistant to noise, so it can be used when smoother features need to be extracted.

* In Which Case Should It Be Used?: In cases where the model needs to take a less detail-oriented and more generalizing approach, for example, in more homogeneous data or in regularization requirements.
#### In short:

* Max Pooling: Cases where feature extraction is important and salient details are emphasized.
* Average Pooling: Cases where more general feature extraction and over-learning should be avoided.

---------

### 8. Valid Padding vs Same Padding: Which One Should Be Used In Which Case?

* Valid Padding:

There is a loss of dimension in the edge areas. In other words, the dimensions of the input image are reduced compared to the output.

Advantage: It is faster in terms of calculation because the area to be processed is smaller.

Use Case: If the output of the model (feature map) should be smaller and computational efficiency is at the forefront, valid padding is preferred.

* Same Padding:

The input and output dimensions are the same. In other words, zero padding is done on the edges (padding) so that the output image is the same as the dimensions of the input.

Advantage: More information is preserved during feature extraction of the model and there is no loss of information in the deeper layers of the model.

Use Case: If you want the output dimensions of the model to be close to the input dimensions and do not want to experience feature loss, same padding is preferred.
#### How Do We Understand?

* Valid Padding: If you don't care about the output size being smaller and want to process faster.

* Same Padding: If you want the output size to be similar to the input and want the model to learn more details.

----

### 9. What Happens to the Model if the Kernel in Conv2D (5,5) is Enlarged?

1-) Kernel Enlargement (For example, going from 3x3 to 5x5):

* More calculations: Increasing the kernel size will cause you to have more parameters for each filter. This requires more calculations and can lead to longer training times.

* More information collection: A larger kernel can collect more information by covering a larger area. This is especially useful in cases where more complex structures need to be learned.

* Overfitting risk may increase because the model may try to learn too much detail and as a result may perform poorly on the test data.

2-) Model Training Status:

* Less Training: If you increase the kernel size and experience overfitting in your model, generalizability to the test data may decrease because your model has learned unnecessary much detail.

* Overtraining: Increasing the kernel size can lead to more learning capacity, but if the right regularization strategies are not used, the model may take more time and require larger data

  ### In summary:

* Small kernel (3x3): Generally provides faster training and more efficient computations, learning details in smaller areas.

* Large kernel (5x5 or 7x7): Learns information in larger areas, but model training may take longer and the risk of overfitting may increase.

* Which kernel size to choose depends on the complexity of your dataset, the depth of the model, and your overall goals.
---------------------

# LET'S EVALUATE THE MODEL

In [None]:

x_test, y_test = next(test_generator)


predictions = model.predict(x_test)


predicted_classes = np.argmax(predictions, axis=1)
true_classes = np.argmax(y_test, axis=1)


print("Accuracy:", accuracy_score(true_classes, predicted_classes))
print("Classification Report:")
print(classification_report(true_classes, predicted_classes))
print("Confusion Matrix:")
print(confusion_matrix(true_classes, predicted_classes))


* If you get an error, you can check the code below.

In [None]:

'''

all_predictions = []
all_true_classes = []


for x_batch, y_batch in test_generator:
    predictions = model.predict(x_batch)
    predicted_classes = np.argmax(predictions, axis=1)
    true_classes = np.argmax(y_batch, axis=1)


    all_predictions.extend(predicted_classes)
    all_true_classes.extend(true_classes)


print("Accuracy:", accuracy_score(all_true_classes, all_predictions))
print("Classification Report:")

print(classification_report(all_true_classes, all_predictions))

print("Confusion Matrix:")
print(confusion_matrix(all_true_classes, all_predictions))


'''



### I hope it was useful for you.