# Import Necessary Libraries

In [2]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
from fastai.vision import *
import gc 


# Define Paths

In [3]:
PATH = Path('../input/planets-dataset/planet/planet')
Train_PATH = Path('../input/planets-dataset/planet/planet/train-jpg')
Test_PATH = Path('../input/planets-dataset/planet/planet/test-jpg') 

PATH.ls()

# Load Data 

In [4]:
TrainData = pd.read_csv(PATH/'train_classes.csv')
TestData = pd.read_csv(PATH/'sample_submission.csv')

# Exploratory Data Analysis  

In [5]:
# Let's have a look at the shape of the datasets


print('Train_Data = {}'.format(TrainData.shape))
print('Test_Data = {}'.format(TestData.shape))



In [6]:
#let's have a look at the image files 

print('Size of Training files = {}'.format(len(Train_PATH.ls())))
print('Size of Test files = {}'.format(len(Test_PATH.ls())))

In [8]:
TrainData.head(10)

In [9]:
TrainData['tags'].value_counts()

In [10]:
TrainData.isnull().sum()

In [11]:
labels = TrainData.groupby('tags')['image_name'].count().reset_index()


In [12]:
labels.sort_values('image_name',ascending=False).head()

#### lets take a look at some images from the most dominated classes ie clear primary and partly_cloudy_primary 

In [13]:
df_primary = TrainData.loc[TrainData['tags']=='clear primary'].head()
df_partly_cloudy = TrainData.loc[TrainData['tags']=='partly_cloudy primary'].head()



In [14]:
df_primary

In [15]:
df_partly_cloudy

In [None]:
pip install fastai==1.0.61

In [16]:
clear_primary = open_image(Train_PATH/'train_2.jpg') 

clear_primary 

In [17]:
partly_cloudy_primary = open_image(Train_PATH/'train_17.jpg')

partly_cloudy_primary



## Train Model

In [18]:
#Define Transformation 

tfms = get_transforms(flip_vert=True, max_lighting=0.1, max_zoom=1.05, max_warp=0.)

In [19]:
    src = ImageList.from_df(TrainData,path=Train_PATH,cols='image_name',suffix='.jpg').split_by_rand_pct(0.2).label_from_df(cols='tags',label_delim=' ')

In [20]:
 data = src.transform(tfms).databunch(bs=64).normalize(imagenet_stats)

In [24]:
data.show_batch(rows=2)

In [25]:
arch = models.resnet50


In [26]:
acc = partial(accuracy_thresh, thresh=0.2)
f_score = partial(fbeta, thresh=0.2)
learn = cnn_learner(data,arch,metrics=[acc, f_score],model_dir='/kaggle/working')

In [27]:
#use LR finder to get a good  a learning rate 

learn.lr_find()

In [28]:
learn.recorder.plot()

In [29]:
# fit the head of the network 

lr = 1e-2

In [32]:
learn.fit_one_cycle(8, slice(lr))

In [33]:
learn.save('Stage1-rs50-256')

In [34]:
#fine tune the model 

learn.unfreeze()

In [38]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(8,slice(1e-5,lr/5))

In [None]:
learn.save('Stage2-rs50-256')


In [None]:
learn.export('/kaggle/working/export.pkl')

In [None]:
test = ImageList.from_folder(PATH/'test-jpg').add(ImageList.from_folder('../input/planets-dataset/test-jpg-additional/test-jpg-additional'))
len(test)

In [None]:
learn = load_learner(Path('/kaggle/working'), test=test)
preds, _ = learn.get_preds(ds_type=DatasetType.Test)

In [None]:
thresh = 0.2
labelled_preds = [' '.join([learn.data.classes[i] for i,p in enumerate(pred) if p > thresh]) for pred in preds]

In [None]:
fnames = [f.name[:-4] for f in learn.data.test_ds.items]

In [None]:
preds_df = pd.DataFrame({'image_name':fnames, 'tags':labelled_preds}, columns=['image_name', 'tags'])

In [None]:
preds_df.to_csv('/kaggle/working/submission.csv', index=False)

In [None]:
preds_df.sort_values('image_name', ascending = True).head(10)


In [None]:
preds_df.shape