In [None]:
import pandas as pd
import numpy as np

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, plot_roc_curve
import matplotlib.pyplot as plt

# Step 1: Vectorize the text data
vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Step 2: Train a Logistic Regression model
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

# Step 3: Make predictions
y_pred = model.predict(X_test_tfidf)
y_pred_proba = model.predict_proba(X_test_tfidf)[:, 1]  # Probabilities for ROC and AUC-ROC

# Step 4: Evaluate the model

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Precision, Recall, F1-Score
precision = precision_score(y_test, y_pred, average='binary')
recall = recall_score(y_test, y_pred, average='binary')
f1 = f1_score(y_test, y_pred, average='binary')
print(f"Precision: {precision:.2f}")
print(f"Recall (Sensitivity): {recall:.2f}")
print(f"F1-Score: {f1:.2f}")

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print(f"Confusion Matrix:\n{cm}")

# Specificity (True Negative Rate)
specificity = cm[1, 1] / (cm[1, 1] + cm[1, 0])
print(f"Specificity (True Negative Rate): {specificity:.2f}")

# Classification report for detailed metrics
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Step 5: ROC Curve and AUC-ROC
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)
print(f"AUC-ROC: {roc_auc:.2f}")

# Plot ROC Curve
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc="lower right")
plt.show()

# Step 6: Predict on a new text
new_text = ["There is a fire in California right now!"]
new_text_tfidf = vectorizer.transform(new_text)
prediction = model.predict(new_text_tfidf)
print(f"Prediction for the new text: {prediction}")


In [None]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from PIL import Image
import os

train_dir = '/kaggle/working/train/'
val_dir = '/kaggle/working/val/'
test_dir = '/kaggle/working/test/'

def check_images(directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            try:
                img_path = os.path.join(root, file)
                img = Image.open(img_path)
                img.verify()  # Verify if the image is valid
            except Exception as e:
                os.remove(img_path)
                print(f"Corrupted image: {img_path}, Error: {e}")

check_images(train_dir)  # Check your train directory
check_images(val_dir)    # Check your validation directory
check_images(test_dir)   # Check your test directory
print("CHECK DONE")

In [None]:
#!rm /kaggle/working/val/Human_Damage/02_0069.png

In [None]:
'''import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Set paths for training, validation, and test data
train_dir = '/kaggle/working/train/'
val_dir = '/kaggle/working/val/'
test_dir = '/kaggle/working/test/'

# Image dimensions and other parameters
img_height, img_width = 150, 150  # You can adjust this based on your needs
batch_size = 32

# Data augmentation for the training set
train_datagen = ImageDataGenerator(
    rescale=1.0/255,  # Normalize pixel values
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Data preparation for validation and test sets
val_datagen = ImageDataGenerator(rescale=1.0/255)
test_datagen = ImageDataGenerator(rescale=1.0/255)

# Load images from directories
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'  # Use 'binary' for binary classification
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False  # Do not shuffle for evaluation
)

# Define a simple CNN model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(len(train_generator.class_indices), activation='softmax')  # Number of classes
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    validation_data=val_generator,
    validation_steps=val_generator.samples // batch_size,
    epochs=10  # You can adjust the number of epochs
)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_generator)
print(f'Test accuracy: {test_accuracy:.2f}')

# Make predictions (optional)
# predictions = model.predict(test_generator)'''


In [None]:
#model.save('image_classification_model.h5')  # Save the model to an HDF5 file#

In [None]:
# Define paths for the callback directories
print("MODEL START DONE")
checkpoint_dir = '/kaggle/working/checkpoints/'
logs_dir = '/kaggle/working/logs/'
training_log_dir = '/kaggle/working/training_log/'

# Create the directories if they don't exist
os.makedirs(checkpoint_dir, exist_ok=True)
os.makedirs(logs_dir, exist_ok=True)
os.makedirs(training_log_dir, exist_ok=True)
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG19
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import TerminateOnNaN
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.callbacks import CSVLogger
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import EarlyStopping

seed_value = 97  # You can choose any integer
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)




# Set paths for training, validation, and test data
train_dir = '/kaggle/working/train/'
val_dir = '/kaggle/working/val/'
test_dir = '/kaggle/working/test/'

# Image dimensions and other parameters
img_height, img_width = 150, 150  # You can adjust this based on your needs
batch_size = 32

# Data augmentation for the training set
train_datagen = ImageDataGenerator(
    rescale=1.0/255,  # Normalize pixel values
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Data preparation for validation and test sets
val_datagen = ImageDataGenerator(rescale=1.0/255)
test_datagen = ImageDataGenerator(rescale=1.0/255)

# Load images from directories
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'  # Use 'binary' for binary classification
)

val_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False  # Do not shuffle for evaluation
)

# Load the VGG19 model, excluding the top layers
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Add your custom layers on top of the VGG19 model
x = Flatten()(base_model.output)
x = Dense(128, activation='relu')(x)
output = Dense(len(train_generator.class_indices), activation='softmax')(x)

# Create the final model
model = Model(inputs=base_model.input, outputs=output)

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Now define your callbacks using the created directories
checkpoint = ModelCheckpoint(os.path.join(checkpoint_dir, 'best_model.keras'), monitor='val_accuracy', save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=5)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=6)
tensorboard = TensorBoard(log_dir=logs_dir, histogram_freq=1)
csv_logger = CSVLogger(os.path.join(training_log_dir, 'training.log'))

def scheduler(epoch, lr):
    if epoch > 5:
        return float(lr * tf.math.exp(-0.1))
    return lr

lr_scheduler = LearningRateScheduler(scheduler)
terminate_on_nan = TerminateOnNaN()



# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    validation_data=val_generator,
    validation_steps=val_generator.samples // batch_size,
    epochs=1,  # You can adjust the number of epochs
    callbacks=[checkpoint, early_stopping, reduce_lr, tensorboard,csv_logger,lr_scheduler,terminate_on_nan]
)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_generator)
print(f'Test accuracy: {test_accuracy:.2f}')

# Make predictions (optional)
# predictions = model.predict(test_generator)


In [None]:
class_names = train_generator.class_indices
print(class_names)

In [None]:
model.save('/kaggle/working/my_final_model.keras')  # Save as .keras format


In [None]:
import zipfile
import os

model_path = '/kaggle/working/my_final_model.keras'
zip_file_path = '/kaggle/working/my_final_model.zip'

# Create a zip file containing the model
with zipfile.ZipFile(zip_file_path, 'w') as zipf:
    zipf.write(model_path, os.path.basename(model_path))