In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'airbus-ship-detection:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-competitions-data%2Fkaggle-v2%2F9988%2F868324%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240605%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240605T055128Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D267753a9e2005dd5dcc012649a117e2288b1679a82214cb2ca739b8f3bf416e2ed7a1eb0d542f1eec260d2842e2bfe64099758310f2f844992f6df8100c81776ed9c6485fef762ec116877631e5af55126cbe85da1496be7d9d36f5edce1dac218d595a32b3dcf328b40a634b868b4dc79a09c52e8d33c3d565db1f73079b3611d0417daa55082a6e8322d5907e64e55f6cefeaedcfb3999b7d83aa458e12f2ec8f2029a677a217921c9fba2114126ad4e4dafc361808822f16e4e198849bb36ddaed0384237425415be941d9ed28e1e8770e3fcb415ef0afa82dec977570d3b5caea263d433765085c7592b72a0db2a50218b468ece227a9f0a08d0cfcab4d9'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading airbus-ship-detection, 30690510746 bytes compressed
Data source import complete.


In [None]:
import numpy as np
import pandas as pd
from PIL import Image
import os
from sklearn.model_selection import train_test_split

In [None]:
!rm -rf

In [None]:
import keras
import tensorflow as tf
from keras.layers import Input,Conv2D,MaxPooling2D,Dropout,Dense,Flatten,BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.models import Model

In [None]:
#df-masks

In [None]:
df=pd.read_csv("/kaggle/input/airbus-ship-detection/train_ship_segmentations_v2.csv")

In [None]:
df['path'] = df['ImageId'].map(lambda x: os.path.join("/kaggle/input/airbus-ship-detection/train_v2", x))

In [None]:
df

Unnamed: 0,ImageId,EncodedPixels,path
0,00003e153.jpg,,/kaggle/input/airbus-ship-detection/train_v2/0...
1,0001124c7.jpg,,/kaggle/input/airbus-ship-detection/train_v2/0...
2,000155de5.jpg,264661 17 265429 33 266197 33 266965 33 267733...,/kaggle/input/airbus-ship-detection/train_v2/0...
3,000194a2d.jpg,360486 1 361252 4 362019 5 362785 8 363552 10 ...,/kaggle/input/airbus-ship-detection/train_v2/0...
4,000194a2d.jpg,51834 9 52602 9 53370 9 54138 9 54906 9 55674 ...,/kaggle/input/airbus-ship-detection/train_v2/0...
...,...,...,...
231718,fffedbb6b.jpg,,/kaggle/input/airbus-ship-detection/train_v2/f...
231719,ffff2aa57.jpg,,/kaggle/input/airbus-ship-detection/train_v2/f...
231720,ffff6e525.jpg,,/kaggle/input/airbus-ship-detection/train_v2/f...
231721,ffffc50b4.jpg,,/kaggle/input/airbus-ship-detection/train_v2/f...


In [None]:
df["EncodedPixels"]=df["EncodedPixels"].fillna(0)


In [None]:
#has_ships-ship

In [None]:
df["has_ship"]=0

In [None]:
df.loc[df["EncodedPixels"]!=0,"has_ship"]=1

In [None]:
df


Unnamed: 0,ImageId,EncodedPixels,path,has_ship
0,00003e153.jpg,0,/kaggle/input/airbus-ship-detection/train_v2/0...,0
1,0001124c7.jpg,0,/kaggle/input/airbus-ship-detection/train_v2/0...,0
2,000155de5.jpg,264661 17 265429 33 266197 33 266965 33 267733...,/kaggle/input/airbus-ship-detection/train_v2/0...,1
3,000194a2d.jpg,360486 1 361252 4 362019 5 362785 8 363552 10 ...,/kaggle/input/airbus-ship-detection/train_v2/0...,1
4,000194a2d.jpg,51834 9 52602 9 53370 9 54138 9 54906 9 55674 ...,/kaggle/input/airbus-ship-detection/train_v2/0...,1
...,...,...,...,...
231718,fffedbb6b.jpg,0,/kaggle/input/airbus-ship-detection/train_v2/f...,0
231719,ffff2aa57.jpg,0,/kaggle/input/airbus-ship-detection/train_v2/f...,0
231720,ffff6e525.jpg,0,/kaggle/input/airbus-ship-detection/train_v2/f...,0
231721,ffffc50b4.jpg,0,/kaggle/input/airbus-ship-detection/train_v2/f...,0


In [None]:
#agg_ship_df=unique_img_ids

In [None]:
agg_ship_df = df.groupby('ImageId').agg({'has_ship': 'sum'}).reset_index()

In [None]:
agg_ship_df.columns=["ImageId","ships"]

In [None]:
agg_ship_df

Unnamed: 0,ImageId,ships
0,00003e153.jpg,0
1,0001124c7.jpg,0
2,000155de5.jpg,1
3,000194a2d.jpg,5
4,0001b1832.jpg,0
...,...,...
192551,fffedbb6b.jpg,0
192552,ffff2aa57.jpg,0
192553,ffff6e525.jpg,0
192554,ffffc50b4.jpg,0


In [None]:
agg_ship_df["has_ship"]=0
agg_ship_df.loc[agg_ship_df["ships"]>0,"has_ship"]=1

In [None]:
agg_ship_df

Unnamed: 0,ImageId,ships,has_ship
0,00003e153.jpg,0,0
1,0001124c7.jpg,0,0
2,000155de5.jpg,1,1
3,000194a2d.jpg,5,1
4,0001b1832.jpg,0,0
...,...,...,...
192551,fffedbb6b.jpg,0,0
192552,ffff2aa57.jpg,0,0
192553,ffff6e525.jpg,0,0
192554,ffffc50b4.jpg,0,0


In [None]:
agg_ship_df['vec'] = agg_ship_df['has_ship'].map(lambda x: [x])


In [None]:
df.drop(['has_ship'],axis=1, inplace=True)


In [None]:
train_data,valid_data=train_test_split(agg_ship_df,test_size = 0.3,stratify = agg_ship_df['ships'])

In [None]:
train_df = pd.merge(df, train_data)
valid_df = pd.merge(df, valid_data)

In [None]:
train_df

Unnamed: 0,ImageId,EncodedPixels,path,ships,has_ship,vec
0,00003e153.jpg,0,/kaggle/input/airbus-ship-detection/train_v2/0...,0,0,[0]
1,0001124c7.jpg,0,/kaggle/input/airbus-ship-detection/train_v2/0...,0,0,[0]
2,000155de5.jpg,264661 17 265429 33 266197 33 266965 33 267733...,/kaggle/input/airbus-ship-detection/train_v2/0...,1,1,[1]
3,0001b1832.jpg,0,/kaggle/input/airbus-ship-detection/train_v2/0...,0,0,[0]
4,00021ddc3.jpg,108287 1 109054 3 109821 4 110588 5 111356 5 1...,/kaggle/input/airbus-ship-detection/train_v2/0...,9,1,[1]
...,...,...,...,...,...,...
162196,fffe4fa23.jpg,0,/kaggle/input/airbus-ship-detection/train_v2/f...,0,0,[0]
162197,fffe646cf.jpg,0,/kaggle/input/airbus-ship-detection/train_v2/f...,0,0,[0]
162198,fffedbb6b.jpg,0,/kaggle/input/airbus-ship-detection/train_v2/f...,0,0,[0]
162199,ffff2aa57.jpg,0,/kaggle/input/airbus-ship-detection/train_v2/f...,0,0,[0]


In [None]:
datagen=ImageDataGenerator(width_shift_range=0.2,height_shift_range=0.2,shear_range=0.1,rescale=1./255)


In [None]:
def flow_from_dataframe(idg,df,path_col,y_col):
    main_dir=os.path.dirname(df[path_col].values[0])
    new_df=idg.flow_from_directory(main_dir,batch_size=32,class_mode="sparse")
    new_df.filenames=df[path_col].values
    new_df.classes = np.stack(df[y_col].values)
    new_df.n = df.shape[0]
    new_df.filepaths.extend(new_df.filenames)
    new_df.samples = df.shape[0]
    new_df._set_index_array()
    new_df.directory = '' # since we have the full path
    print('Reinserting dataframe: {} images'.format(df.shape[0]))
    return new_df

In [None]:
train_data=flow_from_dataframe(datagen,train_df,"path","vec")

Found 0 images belonging to 0 classes.
Reinserting dataframe: 162201 images


In [None]:
valid_data=flow_from_dataframe(datagen,valid_df,"path","vec")

Found 0 images belonging to 0 classes.
Reinserting dataframe: 69522 images


In [None]:
#train_x,train_y=next(train_data)
#alid_x,valid_y=next(valid_data)

In [None]:
from keras.models import Sequential

In [None]:
model=Sequential()

In [None]:
model.add(Conv2D(32,(3,3),activation='relu',input_shape=(256,256,3)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(64,(3,3),activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(96,(3,3),activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(64,(3,3),activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(32,(3,3),activation='relu'))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(units=1, activation='sigmoid'))


In [None]:
model.compile(optimizer='adam',
             loss='binary_crossentropy',
             metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 254, 254, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2  (None, 127, 127, 32)      0         
 D)                                                              
                                                                 
 batch_normalization (Batch  (None, 127, 127, 32)      128       
 Normalization)                                                  
                                                                 
 conv2d_1 (Conv2D)           (None, 125, 125, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 62, 62, 64)        0         
 g2D)                                                            
                                                        

In [None]:
history = model.fit(train_data,
                   steps_per_epoch=100,
                   epochs=3,
                   validation_data=valid_data,
                   validation_steps=100)

FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/airbus-ship-detection/train_v2/f725b9e45.jpg'