

![](https://images.aicrowd.com/raw_images/challenges/banner_file/897/b4a0766bb1ef4b621e3a.jpg)



# 0. Installing aircrowd-cli and downloading Datas


In [None]:
!pip install -q aicrowd-cli
%load_ext aicrowd.magic

In [None]:
%aicrowd login

Please login here: [34m[1m[4mhttps://api.aicrowd.com/auth/qBNvIq0YzlT6GTsihXKlzHlxe7hIACPEFHrs6maKIgQ[0m
[32mAPI Key valid[0m
[32mSaved API Key successfully![0m


In [None]:
# Downloading the Dataset
!rm -rf data
!mkdir data
%aicrowd ds dl -c environment-classification -o data

images.zip:   0%|          | 0.00/22.8M [00:00<?, ?B/s]

In [None]:
# Unzipping and Organising the datasets
!unzip data/images.zip  -d data/images > /dev/null

# 1. Importing Libraries

In [None]:
import os
import csv 
from pathlib import Path
import random
import time

import pandas as pd
import numpy as np

In [None]:
DATA_DIR = "data/images/"

# 2. Modeling
### First we download ResNet50 

In [None]:
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.models import Sequential

resnet = ResNet50(include_top=False, pooling='avg', weights='imagenet')
my_new_model = Sequential()
my_new_model.add(resnet)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
# Say not to train first layer (ResNet) model. It is already trained
my_new_model.layers[0].trainable = False

### Now we get 2048 dimentional feature vector for each image by applying ResNet50 trained on Imagenet

In [None]:
%%time
from tensorflow.keras.applications.resnet50 import preprocess_input
import cv2 
import numpy as np

resnet_feature_list = []
images = [f for f in os.listdir(DATA_DIR)]
for image in images:
    file = DATA_DIR+image
    im = cv2.imread(file)
    #im = cv2.resize(im,(256,256))
    img = preprocess_input(np.expand_dims(im.copy(), axis=0))
    resnet_feature = my_new_model.predict(img)
    resnet_feature_np = np.array(resnet_feature)
    resnet_feature_list.append(resnet_feature_np.flatten())

array = np.array(resnet_feature_list)

CPU times: user 1min 13s, sys: 2.45 s, total: 1min 15s
Wall time: 1min 39s


In [None]:
array.shape

(700, 2048)

### Now we apply standart kMeans algorithm to separate these vectors into 5 clusters

In [None]:
from sklearn.cluster import KMeans 

kmeans = KMeans(n_clusters=5, random_state=None, n_init=50, max_iter=1000).fit(array)


### This is an unsupervised learning problem so we investigate the results manually

In [None]:
img_ids_list = [f[:-4] for f in images]

pre_sub = {'ImageID':img_ids_list, "label":kmeans.labels_}
pre_sub = pd.DataFrame(pre_sub)

pre_sub = pre_sub.astype(int)
pre_sub = pre_sub.sort_values(by=['ImageID'])  

pre_sub

Unnamed: 0,ImageID,label
160,0,2
127,1,4
410,2,4
484,3,3
324,4,2
...,...,...
528,695,3
94,696,3
175,697,2
249,698,4


In [None]:
pre_sub.label.value_counts()

4    231
3    224
2    114
1    111
0     20
Name: label, dtype: int64

### It is clear that 20 images are missclassified, we get rid of them and repeat the training process



In [None]:

to_del = np.array(pre_sub[pre_sub.label == 0].ImageID)
to_del = set(to_del)
images_clean = []
for image in images:
    if int(image[:-4]) not in to_del:
        images_clean.append(image)
len(images_clean)

680

In [None]:
%%time
from tensorflow.keras.applications.resnet50 import preprocess_input
import cv2 
import numpy as np

resnet_feature_list = []
# images = [f for f in os.listdir(DATA_DIR)]
for image in images_clean:
    file = DATA_DIR+image
    #print(file)
    im = cv2.imread(file)
    #im = cv2.resize(im,(256,256))
    img = preprocess_input(np.expand_dims(im.copy(), axis=0))
    resnet_feature = my_new_model.predict(img)
    resnet_feature_np = np.array(resnet_feature)
    resnet_feature_list.append(resnet_feature_np.flatten())

array = np.array(resnet_feature_list)

CPU times: user 1min 10s, sys: 1.56 s, total: 1min 11s
Wall time: 1min 10s


In [None]:
array.shape

(680, 2048)

In [None]:
from sklearn.cluster import KMeans 

kmeans = KMeans(n_clusters=5, random_state=None, n_init=50, max_iter=1000).fit(array) 


In [None]:
img_ids_list_clean = [f[:-4] for f in images_clean]

In [None]:
pre_sub_2 = {'ImageID':img_ids_list_clean, "label":kmeans.labels_}
pre_sub_2 = pd.DataFrame(pre_sub_2)

pre_sub_2 = pre_sub_2.astype(int)


# we deal with 20 misslabeled images by asigning random labeles
rnd_labels = []
for i in range(len(to_del)):
    rnd_labels.append(random.randint(0,4))

# also we tried manual labeling accordingly to the main labeles   
missing_labels = [3, 3, 2, 1, 1, 0, 1, 2, 1, 2, 2, 2, 3, 2, 1, 4, 0, 2, 0, 3]


ending = {'ImageID':list(to_del), 'label':rnd_labels}
ending = pd.DataFrame(ending)

submission = pd.concat([pre_sub_2, ending], axis=0)
submission = submission.sort_values(by=['ImageID'])  




## Submitting our Predictions to the system

In [None]:
!rm -rf assets
!mkdir assets

submission.to_csv(os.path.join("assets", "submission.csv"), index=False)

In [None]:
# save before the last cell

In [None]:
%aicrowd notebook submit -c environment-classification -a assets --no-verify

 %load_ext aicrowd.magic
%aicrowd notebook submit -c environment-classification -a assets --no-verify
[1;34mMounting Google Drive 💾[0m
Your Google Drive will be mounted to access the colab notebook
Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.activity.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fexperimentsandconfigs%20https%3a%2f%2fwww.googleapis.com%2fauth%2fphotos.native&response_type=code

Enter your authorization code:
4/1AX4XfWgbhNxSVaym8w-m_l37MSNBND4uLZlPzKW86wtP8nl4v_QZSpgZpkk
Mounted at /content/drive
Using notebook: Blit