In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

rootpath = '/media/share/data/kaggle/ieee-camera/'

# preprocess

In [None]:
def df_generater(rootpath, dataset):
    # find all the camera
    subdir = os.path.join(rootpath, dataset)
    n_class = os.listdir(subdir)
    df_temp = []

    for i in n_class:
        for fname in os.listdir(os.path.join(subdir, i)):
            df_temp.append((i, fname))

    df = pd.DataFrame(df_temp, columns=['class', 'fname'])
    print('class:', n_class)
    print('number of file:', df.shape[0])
    return df


def preprocess(rootpath, dataset):
    # read image file and check channels    
    df = df_generater(rootpath, dataset)
    subpath = os.path.join(rootpath, dataset)

    img_para = []
    for idx in range(len(df)):
        img = plt.imread(os.path.join(subpath, df['class'][idx]) + '/' + df['fname'][idx])
        if img.shape[0] > img.shape[1]:
            img = img.transpose(1, 0, 2)
        # image size as a feature
        img_para.append((img.shape[1], img.shape[0]/img.shape[1]))
    
    img_para = pd.DataFrame(img_para, columns=['size', 'ratio'])
    df = pd.concat([df, img_para], 1)
    return df

In [4]:
df_train = preprocess(rootpath, 'train')

class: ['Motorola-X', 'HTC-1-M7', 'iPhone-4s', 'Samsung-Galaxy-Note3', 'Motorola-Nexus-6', 'LG-Nexus-5x', 'Samsung-Galaxy-S4', 'Motorola-Droid-Maxx', 'iPhone-6', 'Sony-NEX-7']
number of file: 2750


In [None]:
img = plt.imread(os.path.join(subpath, df['class'][idx]) + '/' + df['fname'][idx])
if img.shape[0] > img.shape[1]:
    img = img.transpose(1, 0, 2)

h1 = img.shape[0]
h2 = round(h1 / 3)
w1 = img.shape[1]
w2 = round(w1 / 4)

img_1 = np.random.randint(0, h1 - 256)

In [6]:
df_train.sort_values(by='size')

Unnamed: 0,class,fname,size,ratio
1203,Motorola-Nexus-6,(MotoNex6)8.jpg,1040,0.750000
549,HTC-1-M7,(HTC-1-M7)19.jpg,2688,0.565476
362,HTC-1-M7,(HTC-1-M7)60.jpg,2688,0.565476
363,HTC-1-M7,(HTC-1-M7)15.jpg,2688,0.565476
364,HTC-1-M7,(HTC-1-M7)118.jpg,2688,0.565476
365,HTC-1-M7,(HTC-1-M7)206.jpg,2688,0.565476
366,HTC-1-M7,(HTC-1-M7)137.jpg,2688,0.565476
367,HTC-1-M7,(HTC-1-M7)35.jpg,2688,0.565476
368,HTC-1-M7,(HTC-1-M7)227.jpg,2688,0.565476
369,HTC-1-M7,(HTC-1-M7)70.jpg,2688,0.565476


# SVM

In [21]:
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC

le = LabelEncoder()
df_train_le = df_train.copy()
df_train_le['class'] = le.fit_transform(df_train_le['class'])

In [51]:
y = df_train_le['class']
X = pd.concat([df_train_le['size'], df_train_le['ratio']], 1)

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

clf = SVC(kernel='rbf', random_state=np.random)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)
clf.fit(X_train, y_train)
pred_y = clf.predict(X_val)

accuracy_score(y_val, pred_y)

0.57636363636363641

In [41]:
submit = pd.read_csv(rootpath+'sample_submission.csv')
subpath = os.path.join(rootpath, 'test')

img_para = []
for idx in range(len(submit)):
    img = plt.imread(os.path.join(subpath, submit['fname'][idx]))
    if img.shape[0] > img.shape[1]:
        img = img.transpose(1, 0, 2)
    # image size as a feature
    img_para.append((img.shape[1], img.shape[0]/img.shape[1]))

img_para = pd.DataFrame(img_para, columns=['size', 'ratio'])

y_test = clf.predict(img_para)

In [50]:
submit['camera'] = le.inverse_transform(y_test)
submitpath = '/media/share/jiaxin_cmu/kaggle/ieee_camera/submition/'
submit.to_csv(submitpath+'submit01.csv', index=False)

In [45]:
submit.head()

Unnamed: 0,fname,camera
0,img_0002a04_manip.tif,Motorola-Nexus-6
1,img_001e31c_unalt.tif,Motorola-Nexus-6
2,img_00275cf_manip.tif,Motorola-Nexus-6
3,img_0034113_unalt.tif,Motorola-Nexus-6
4,img_00344b7_unalt.tif,Motorola-Nexus-6
