#**Ripik.AI HackFest: Unleashing AI Potential**

AI-Powered Fraud Detection for Vehicle Insurance Claims through DenseNet201 Model

In [None]:
#Importing the required libraries
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.models import load_model
import tensorflow as tf
from tensorflow.keras.applications.resnet import preprocess_input
from tensorflow.keras import layers, models, optimizers
import pandas as pd
import numpy as np
import zipfile
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [None]:
#Loading the zipfile
with zipfile.ZipFile('/content/drive/MyDrive/Ripik Hackathon Dataset/train.zip', 'r') as zip_ref:
    zip_ref.extractall('train')

In [None]:
#Loading the training dataset
train_df = pd.read_csv('/content/train/train/train.csv')

In [None]:
#Data Preprocessing
image_size = (224, 224)
batch_size = 16

In [None]:
#Splitting the dataset into training and validation sets
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

In [None]:
#Converting the 'label' column to strings
train_df['label'] = train_df['label'].astype(str)
val_df['label'] = val_df['label'].astype(str)

In [None]:
#Creating ImageDataGenerators for training and validation sets
train_datagen = ImageDataGenerator(
    rescale=1.0/255.0,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

val_datagen = ImageDataGenerator(rescale=1.0/255.0)

train_generator = train_datagen.flow_from_dataframe(
    train_df,
    directory='/content/train/train/images',
    x_col='filename',
    y_col='label',
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical'
)

val_generator = val_datagen.flow_from_dataframe(
    val_df,
    directory='/content/train/train/images',
    x_col='filename',
    y_col='label',
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical'
)

Found 5760 validated image filenames belonging to 6 classes.
Found 1440 validated image filenames belonging to 6 classes.


In [None]:
#Loading the DenseNet201 model pre-trained on ImageNet data
base_model = DenseNet201(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
#Freeze the pre-trained layers
for layer in base_model.layers[-10:]:
    layer.trainable = False
#Building the model
model = models.Sequential()
model.add(base_model)
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dense(6, activation='softmax'))
#Compile the model
model.compile(
    optimizer=optimizers.SGD(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
#Display the model summary
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 densenet201 (Functional)    (None, 7, 7, 1920)        18321984  
                                                                 
 global_average_pooling2d_3  (None, 1920)              0         
  (GlobalAveragePooling2D)                                       
                                                                 
 dense_3 (Dense)             (None, 6)                 11526     
                                                                 
Total params: 18333510 (69.94 MB)
Trainable params: 17818054 (67.97 MB)
Non-trainable params: 515456 (1.97 MB)
_________________________________________________________________


In [None]:
#Model training
checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True)
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=20,
    callbacks=[checkpoint]
)

Epoch 1/20

  saving_api.save_model(


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
#Loading the test zipfile
with zipfile.ZipFile('/content/drive/MyDrive/Ripik Hackathon Dataset/test.zip', 'r') as zip_ref:
    zip_ref.extractall('test')

In [None]:
#Loading the trained model
model = load_model('/content/best_model.h5')
#Loading the testing dataset
test_df = pd.read_csv('/content/test/test/test.csv')
#Create a data generator for test data
test_datagen = ImageDataGenerator(rescale=1.0/255.0)
test_generator = test_datagen.flow_from_dataframe(
    test_df,
    x_col='filename',
    y_col=None,
    directory='/content/test/test/images',
    target_size=(224, 224),
    batch_size=128,
    class_mode=None,
    shuffle=False
)
#Making predictions for the test set
predictions = model.predict(test_generator)
print(predictions)
#Get the predicted labels
predicted_labels = tf.argmax(predictions, axis=1).numpy() + 1
#Creating a submission DataFrame
submission_df = pd.DataFrame({'image_id': test_df['image_id'], 'label': predicted_labels})
#Saving the submission DataFrame to a CSV file
submission_df.to_csv('submission.csv', index=False)

Found 4800 validated image filenames.
[[5.24211898e-02 3.42505127e-01 2.90265027e-03 4.63463932e-01
  6.78576343e-03 1.31921351e-01]
 [5.62954023e-02 5.77984512e-01 1.41354301e-03 2.86258996e-01
  4.32597753e-03 7.37216324e-02]
 [1.14479195e-02 5.98866761e-01 1.31065748e-03 3.49064976e-01
  1.86879584e-03 3.74409035e-02]
 ...
 [6.35236129e-02 7.57670879e-01 5.67723112e-03 7.78971165e-02
  1.17049310e-02 8.35262313e-02]
 [8.72745179e-04 9.22450423e-01 2.05950477e-04 3.99663150e-02
  2.61945184e-04 3.62425931e-02]
 [1.84447560e-02 3.21432874e-02 2.05048622e-04 9.18249846e-01
  1.66653842e-03 2.92905439e-02]]
