In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Imports

In [2]:
import pandas as pd
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import Conv2D, Dense, Flatten, Input, LSTM, concatenate, Lambda
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from sklearn.model_selection import train_test_split
import numpy as np
import os
import tensorflow as tf

# Define Data Processors

In [3]:
from PIL import Image

# Disable the decompression bomb error warning by increasing the pixel limit
Image.MAX_IMAGE_PIXELS = None

In [4]:
def split_and_reshufle_data(df, test_size_ratio=0.8) ->  'pd.DataFrame':
    train_set, test_set = train_test_split(df, test_size=test_size_ratio)
    train_set = train_set.reset_index(drop=True)
    test_set = test_set.reset_index(drop=True)
    return train_set, test_set

In [5]:
def split_image(df):
    """
    Splits the 'filename' column from the rest of the DataFrame.

    Parameters:
        df (pd.DataFrame): The input DataFrame containing the data.

    Returns:
        tuple: A tuple containing:
            - path_to_images (pd.Series): The 'filename' column.
            - remaining_df (pd.DataFrame): The DataFrame without the 'path_to_image' column.
    """
    # Separate the 'path_to_image' column
    filename_images = df['filename'].values
    # Drop the 'path_to_image' column from the DataFrame
    remaining_df = df.drop(columns=['filename'])
    return filename_images, remaining_df

In [6]:
def split_target(df, target_column):
    """
    Splits the target column from the rest of the DataFrame.

    Parameters:
        df (pd.DataFrame): The input DataFrame containing the data.
        target_column (str): The name of the target column.

    Returns:
        tuple: A tuple containing:
            - target (pd.Series): The target column.
            - features_df (pd.DataFrame): The DataFrame without the target column.
    """
    # Separate the target column
    target = df[target_column]
    # Drop the target column from the DataFrame
    features_df = df.drop(columns=[target_column])
    return features_df, target

In [7]:
def process_tabular_data(df, drop_columns=['date', 'lat', 'long', 'absolute_time']):
    """
    Processes tabular data by dropping specified columns.

    Parameters:
        df (pd.DataFrame): The original DataFrame.
        drop_columns (list): List of column names to be dropped.

    Returns:
        pd.DataFrame: The processed DataFrame (without the dropped columns).
    """
    # Drop the specified columns
    df_processed = df.drop(columns=drop_columns)

    # Return the processed DataFrame
    return df_processed

In [8]:
def process_images(image_paths, target_size=(300, 430)):
    """
    Processes image paths into numpy arrays suitable for model input.

    Parameters:
        image_paths (list): List of paths to the image files.
        target_size (tuple): Target size for resizing images (default: (300, 430)).

    Returns:
        numpy.ndarray: Array of processed images with shape (n_samples, target_size[0], target_size[1], 3).
    """
    processed_images = []
    for path in image_paths:
        # Load the image
        img = load_img(path, target_size=target_size)
        # Convert the image to a numpy array
        img_array = img_to_array(img)
        # Normalize the image array to [0, 1]
        img_array = img_array / 255.0
        processed_images.append(img_array)

    return np.array(processed_images)

# Processing Data

In [9]:
path_to_original_csv = '/content/drive/MyDrive/Projects/Cassini/data/combined_dataset.csv'

In [10]:
original_df = pd.read_csv(path_to_original_csv)

In [11]:
original_df

Unnamed: 0,filename,date,lat,long,absolute_time,pressure,humidity,temp,wind_speed,wind_deg,clouds,potential_wildfire
0,41.06599_26.07051_02-01-2023.jpg,02-01-2023,41.0660,26.0705,1672664400,1030,61,11.69,0.16,179,0,0
1,41.11273_26.14135_02-01-2023.jpg,02-01-2023,41.1127,26.1414,1672664400,1030,67,11.91,0.34,115,0,0
2,41.14205_26.17694_02-01-2023.jpg,02-01-2023,41.1421,26.1769,1672664400,1030,63,12.63,0.52,162,0,0
3,41.16845_26.17591_02-01-2023.jpg,02-01-2023,41.1685,26.1759,1672664400,1030,63,11.80,0.52,162,0,0
4,41.19282_26.23720_02-01-2023.jpg,02-01-2023,41.1928,26.2372,1672664400,1030,63,12.86,0.52,162,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
103,37.96168_23.06582_14-07-2023.jpg,14-07-2023,37.9617,23.0658,1689339600,1012,45,38.51,1.39,293,3,1
104,36.07271_27.93581_18-07-2023.jpg,18-07-2023,36.0727,27.9358,1689685200,1005,55,28.86,2.75,278,0,1
105,36.09572_27.93561_18-07-2023.jpg,18-07-2023,36.0957,27.9356,1689685200,1005,55,28.99,2.75,278,0,1
106,41.23504_25.9001_01-11-2022.jpg,01-11-2022,41.2350,25.9001,1667307600,1022,52,22.01,2.27,48,53,1


In [12]:
test_set, train_set = split_and_reshufle_data(original_df)

train_path_image, train_df = split_image(train_set)
test_path_image, test_df = split_image(test_set)

X_train_tabular, y_train = split_target(train_df, target_column="potential_wildfire")
X_test_tabular, y_test = split_target(test_df, target_column="potential_wildfire")


X_train_tabular = process_tabular_data(X_train_tabular)
X_test_tabular = process_tabular_data(X_test_tabular)

In [48]:
X_train_tabular.columns

Index(['pressure', 'humidity', 'temp', 'wind_speed', 'wind_deg', 'clouds'], dtype='object')

In [47]:
path_to_all_images = '/content/drive/MyDrive/Projects/Cassini/data/COMMON'
train_path_image = [os.path.join(path_to_all_images, i) for i in train_path_image]
test_path_image = [os.path.join(path_to_all_images, i) for i in test_path_image]

In [14]:
X_train_images_array = process_images(train_path_image)
X_test_images_array = process_images(test_path_image)

In [15]:
X_train_tabular

Unnamed: 0,pressure,humidity,temp,wind_speed,wind_deg,clouds
0,1011,30,34.67,8.23,30,20
1,1030,63,11.80,0.52,162,0
2,1013,47,24.18,3.55,28,36
3,1005,55,28.20,2.75,278,0
4,1005,55,28.86,2.75,278,0
...,...,...,...,...,...,...
82,1005,69,12.69,1.18,257,18
83,1001,14,38.56,9.52,138,70
84,1013,47,23.60,3.55,28,36
85,1013,47,23.39,3.55,28,36


# Define Model Architecture

In [30]:
def build_model():
    """
    Builds a dual-input neural network model with an image branch and a tabular branch.

    Returns:
        keras.Model: The compiled model.
    """
    # Image processing branch
    image_input = Input(shape=(300, 430, 3))
    base_model = ResNet50(include_top=False, input_shape=(300, 430, 3), weights='imagenet')
    x = base_model(image_input)
    x = Flatten()(x)
    image_branch_output = Dense(128, activation=tf.keras.layers.LeakyReLU(negative_slope=0.3))(x)

    # Tabular data processing branch
    tabular_input = Input(shape=(6,))
    # Wrap tf.expand_dims in a Lambda layer
    tabular_input_expanded = Lambda(lambda x: tf.expand_dims(x, axis=1))(tabular_input)
    tabular_branch_output = Dense(128, activation='sigmoid')(tabular_input_expanded)

    # Combine the outputs of the two branches
    combined = concatenate([image_branch_output, tabular_branch_output])
    z = Dense(128, activation='sigmoid')(combined)

    # Output layer with one unit for binary classification
    output = Dense(1, activation='sigmoid')(z)

    # Build and compile the model
    model = Model(inputs=[image_input, tabular_input], outputs=output)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Training model

In [26]:
model_save_path = "/content/drive/MyDrive/Projects/Cassini/models/model2.h5"

In [42]:
# Initialize the model
model = build_model()

# Train the model
model.fit(
    [X_train_images_array, X_train_tabular],  # Input: Images and Tabular data
    y_train,
    validation_data=([X_test_images_array, X_test_tabular], y_test),
    epochs=50,
    batch_size=10,
    verbose=1
)

# Save the model to the specified path
model.save(model_save_path)
print(f"Model saved at {model_save_path}")

Epoch 1/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 5s/step - accuracy: 0.6827 - loss: 0.6471 - val_accuracy: 0.4286 - val_loss: 1.2510
Epoch 2/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 294ms/step - accuracy: 0.8421 - loss: 0.4390 - val_accuracy: 0.4286 - val_loss: 1.3956
Epoch 3/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 280ms/step - accuracy: 0.8738 - loss: 0.2845 - val_accuracy: 0.5714 - val_loss: 0.6831
Epoch 4/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 278ms/step - accuracy: 0.9642 - loss: 0.1787 - val_accuracy: 0.4286 - val_loss: 1.2887
Epoch 5/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 297ms/step - accuracy: 0.9515 - loss: 0.1472 - val_accuracy: 0.4286 - val_loss: 1.9030
Epoch 6/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 282ms/step - accuracy: 0.8333 - loss: 0.4231 - val_accuracy: 0.4286 - val_loss: 2.4962
Epoch 7/50
[1m9/9[0m [32m━━━━━━━━━━━━━



Model saved at /content/drive/MyDrive/Projects/Cassini/models/model2.h5


In [43]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report
import numpy as np

# Evaluate the model on the test set
y_pred_probs = model.predict([X_test_images_array, X_test_tabular])  # Predict probabilities
y_pred = (y_pred_probs > 0.5).astype(int)  # Convert probabilities to binary predictions

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred_probs)

# Print the metrics
print("Evaluation Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"ROC-AUC Score: {roc_auc:.4f}")

# Optional: Print a classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7s/step
Evaluation Metrics:
Accuracy: 0.5714
Precision: 0.0000
Recall: 0.0000
F1 Score: 0.0000
ROC-AUC Score: 0.6667

Classification Report:
              precision    recall  f1-score   support

           0       0.57      1.00      0.73        12
           1       0.00      0.00      0.00         9

    accuracy                           0.57        21
   macro avg       0.29      0.50      0.36        21
weighted avg       0.33      0.57      0.42        21



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Prediction

In [44]:
# Function to make a prediction for a single input
def predict_single(image_path, tabular_input, model):
    """
    Predicts output for a single image and tabular input.

    Parameters:
        image_path (str): Path to the image.
        tabular_input (list or array): Tabular input data as a 1D array.
        model (Model): The trained model.

    Returns:
        numpy.ndarray: Predicted output.
    """
    # Process the image
    image_array = process_images([image_path])  # Returns a 4D array (1, 300, 300, 3)
    # Convert tabular input to a 2D array (1, 6)
    tabular_array = np.expand_dims(tabular_input, axis=0)
    # Make prediction
    prediction = model.predict([image_array, tabular_array])
    return prediction

In [45]:
# Example: Predict for a single input
example_image_path = test_path_image[0]  # Use a test image path
example_tabular_input = X_test_tabular.iloc[0]  # Use corresponding tabular data
prediction = predict_single(example_image_path, example_tabular_input, model)

print("Prediction for single input:", prediction)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
Prediction for single input: [[0.03454247]]


# TESTING

In [None]:
path_photos = '/content/drive/MyDrive/Projects/Cassini/data/TRUE'

In [None]:
import os

In [None]:
def create_fake_dataset(image_folder_path, num_columns=6):
    """
    Creates a fake dataset from images in a folder with random tabular data and binary target.

    Parameters:
        image_folder_path (str): Path to the folder containing images.
        num_columns (int): Number of random columns to generate (default: 6).

    Returns:
        pd.DataFrame: A DataFrame with image paths, random tabular data, and a binary target column.
    """
    # Get a list of image paths
    image_paths = [
        os.path.join(image_folder_path, filename)
        for filename in os.listdir(image_folder_path)
        if filename.lower().endswith(('.png', '.jpg', '.jpeg'))
    ]

    if not image_paths:
        raise ValueError(f"No images found in the folder: {image_folder_path}")

    # Generate random data for columns
    num_images = len(image_paths)
    random_data = {
        f"column{i+1}": np.random.rand(num_images) for i in range(num_columns)
    }

    # Generate random binary target column
    random_data['target'] = np.random.choice([0, 1], size=num_images)

    # Add image paths to the DataFrame
    random_data['path_to_image'] = image_paths

    # Create DataFrame
    df = pd.DataFrame(random_data)

    return df

In [None]:
fake_df  = create_fake_dataset(path_photos)

In [None]:
fake_df

Unnamed: 0,column1,column2,column3,column4,column5,column6,target,path_to_image
0,0.198597,0.937631,0.906592,0.577526,0.065384,0.775842,0,/content/drive/MyDrive/Projects/Cassini/data/T...
1,0.057167,0.121903,0.993898,0.072478,0.903601,0.170554,1,/content/drive/MyDrive/Projects/Cassini/data/T...
2,0.469939,0.620575,0.693336,0.017758,0.976206,0.635406,0,/content/drive/MyDrive/Projects/Cassini/data/T...
3,0.561949,0.642366,0.544218,0.825793,0.916712,0.460333,1,/content/drive/MyDrive/Projects/Cassini/data/T...
4,0.956612,0.846405,0.86927,0.676869,0.412658,0.485153,1,/content/drive/MyDrive/Projects/Cassini/data/T...
5,0.245115,0.772364,0.788404,0.286908,0.553916,0.776835,0,/content/drive/MyDrive/Projects/Cassini/data/T...
6,0.370756,0.045522,0.511941,0.282198,0.762601,0.667828,1,/content/drive/MyDrive/Projects/Cassini/data/T...
7,0.723349,0.975001,0.722528,0.301466,0.453865,0.233122,0,/content/drive/MyDrive/Projects/Cassini/data/T...
8,0.076485,0.92726,0.855316,0.327749,0.497369,0.257699,1,/content/drive/MyDrive/Projects/Cassini/data/T...
9,0.66417,0.080772,0.419019,0.564154,0.995753,0.080996,1,/content/drive/MyDrive/Projects/Cassini/data/T...


In [None]:
train_set, test_set = split_data(fake_df)

train_path_image, train_df = split_path_to_image(train_set)
test_path_image, test_df = split_path_to_image(test_set)

X_train_tabular, y_train = split_target(train_df, target_column="target")
X_test_tabular, y_test = split_target(test_df, target_column="target")

X_train_images_array = process_images(train_path_image)
X_test_images_array = process_images(test_path_image)

In [None]:
# Initialize the model
model = build_model()

# Train the model
history = model.fit(
    [X_train_images_array, X_train_tabular],  # Inputs: Images and Tabular data
    y_train,  # Target
    validation_data=([X_test_images_array, X_test_tabular], y_test),  # Validation set
    epochs=20,  # Adjust epochs as needed
    batch_size=8,  # Adjust batch size as needed
    verbose=1
)

Epoch 1/20
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 21s/step - accuracy: 0.6742 - loss: 4.8989 - val_accuracy: 0.4375 - val_loss: 4.8357
Epoch 2/20
[1m1/2[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m18s[0m 19s/step - accuracy: 0.7500 - loss: 0.9362

KeyboardInterrupt: 