<a href="https://colab.research.google.com/github/Shafiu1/DSA/blob/main/feature_fusions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# bangla_news_classifier.py

import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding, LSTM, Bidirectional, Dense, Concatenate, Dropout, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import compute_class_weight
from sklearn.model_selection import train_test_split
import pickle as pkl
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize
import warnings
warnings.filterwarnings('ignore')

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Define paths
folder_path = "/content/drive/MyDrive/BanglaNewsClassifier/"
dataset_path = "/content/drive/MyDrive/BanglaNewsClassifier/Images/"

# Load and prepare dataset
df = pd.read_csv(folder_path + 'headlines.csv')
df['full_image_path'] = df['Image_Path'].apply(lambda x: f"{dataset_path}{x}")

# Split data
train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.33, random_state=42)

# Encode labels
le = LabelEncoder()
train_labels = le.fit_transform(train_df['Category(Bangla)'])
val_labels = le.transform(val_df['Category(Bangla)'])
test_labels = le.transform(test_df['Category(Bangla)'])
num_classes = len(le.classes_)

# Image preprocessing
def process_image(image_path):
    img = tf.keras.utils.load_img(image_path, target_size=(224, 224))
    img_array = tf.keras.utils.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    return preprocess_input(img_array)[0].astype(np.float32)

def load_or_process_images(df, save_path):
    if os.path.exists(save_path):
        with open(save_path, 'rb') as f:
            images = pkl.load(f)
    else:
        images = np.array([process_image(path) for path in df['full_image_path']])
        with open(save_path, 'wb') as f:
            pkl.dump(images, f)
    return images

train_images = load_or_process_images(train_df, folder_path + 'train_images.pkl')
val_images = load_or_process_images(val_df, folder_path + 'val_images.pkl')
test_images = load_or_process_images(test_df, folder_path + 'test_images.pkl')

# Text preprocessing
tokenizer = Tokenizer(num_words=50000, oov_token='<oov>')
tokenizer.fit_on_texts(df['Heading(Bangla)'])
max_len = 128

train_sequences = tokenizer.texts_to_sequences(train_df['Heading(Bangla)'])
val_sequences = tokenizer.texts_to_sequences(val_df['Heading(Bangla)'])
test_sequences = tokenizer.texts_to_sequences(test_df['Heading(Bangla)'])

train_pad_sequences = pad_sequences(train_sequences, maxlen=max_len, padding='post', value=0.0)
val_pad_sequences = pad_sequences(val_sequences, maxlen=max_len, padding='post', value=0.0)
test_pad_sequences = pad_sequences(test_sequences, maxlen=max_len, padding='post', value=0.0)





In [None]:
# Model definition
from tensorflow.keras.mixed_precision import set_global_policy
set_global_policy('mixed_float16')

visual_input = Input(shape=(224, 224, 3))
resnet = ResNet50(weights='imagenet', include_top=False, input_tensor=visual_input)
for layer in resnet.layers[:-10]:
    layer.trainable = False
x = resnet.output
x = GlobalAveragePooling2D()(x)

text_input = Input(shape=(max_len,))
embedding_layer = Embedding(len(tokenizer.word_index) + 1, 100)(text_input)
lstm_layer = Bidirectional(LSTM(128, dropout=0.2))(embedding_layer)

concat_layer = Concatenate()([x, lstm_layer])
x = Dropout(0.2)(concat_layer)
x = Dense(64, activation='relu')(x)
output = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=[visual_input, text_input], outputs=output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # CHANGED: Changed loss from 'sparse_categorical_crossentropy' to 'categorical_crossentropy'

# Class weights
class_weights = compute_class_weight('balanced', classes=np.unique(train_labels), y=train_labels)
class_weights = dict(enumerate(class_weights))

# Convert labels to one-hot for training
train_labels_one_hot = tf.keras.utils.to_categorical(train_labels, num_classes)
val_labels_one_hot = tf.keras.utils.to_categorical(val_labels, num_classes)

# Add early stopping
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train model
history = model.fit([train_images, train_pad_sequences], train_labels_one_hot,
                    validation_data=([val_images, val_pad_sequences], val_labels_one_hot),
                    epochs=10, batch_size=8, class_weight=class_weights,
                    callbacks=[tf.keras.callbacks.ModelCheckpoint(folder_path + 'best_model.h5',
                                                                 monitor='val_accuracy', save_best_only=True),
                               early_stopping])

# Evaluate model
test_labels_one_hot = tf.keras.utils.to_categorical(test_labels, num_classes)
test_loss, test_accuracy = model.evaluate([test_images, test_pad_sequences], test_labels_one_hot)
print(f"Test accuracy: {test_accuracy:.4f}")

y_pred = model.predict([test_images, test_pad_sequences])
y_pred_classes = np.argmax(y_pred, axis=1)
from sklearn.metrics import classification_report
print(classification_report(test_labels, y_pred_classes, target_names=le.classes_))

Epoch 1/10
[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.5061 - loss: 1.4098



[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 108ms/step - accuracy: 0.5065 - loss: 1.4088 - val_accuracy: 0.6861 - val_loss: 1.1280
Epoch 2/10
[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - accuracy: 0.8493 - loss: 0.4345



[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 95ms/step - accuracy: 0.8494 - loss: 0.4345 - val_accuracy: 0.7630 - val_loss: 0.9149
Epoch 3/10
[1m350/351[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 52ms/step - accuracy: 0.9530 - loss: 0.1459



[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 104ms/step - accuracy: 0.9530 - loss: 0.1458 - val_accuracy: 0.8548 - val_loss: 0.4934
Epoch 4/10
[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 60ms/step - accuracy: 0.9788 - loss: 0.0801 - val_accuracy: 0.8536 - val_loss: 0.5519
Epoch 5/10
[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 56ms/step - accuracy: 0.9938 - loss: 0.0237 - val_accuracy: 0.8387 - val_loss: 0.7474
Epoch 6/10
[1m351/351[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 58ms/step - accuracy: 0.9935 - loss: 0.0162 - val_accuracy: 0.8400 - val_loss: 0.8523
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 90ms/step - accuracy: 0.8384 - loss: 0.5854
Test accuracy: 0.8413
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 363ms/step
                      precision    recall  f1-score   support

  অর্থনীতি