In [1]:
import numpy as np
from sklearn import svm, grid_search
from matplotlib import pyplot as plt
%matplotlib inline
from pylab import *
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
import pickle

# Data cleaning

### Loading the data

In [2]:
X_train = np.load('X_trn5.np')
Y_train = np.load('Y_trn5.np')
X_train.shape

(294L,)

### Cleaning the data

In [12]:
l = np.array([len(x) for x in X_train])
X_train = X_train[l == 1600]
X_train2 = np.array([x for x in X_train])
Y_train2 = Y_train[l == 1600]
X_train2.shape

(291L, 1600L)

In [23]:
Y_train2.shape

(291L,)

# Cross-Validation

### Why a Support Vector Machine Classifier?
1. It works really well with clear margin of separation;
2. It is effective in high dimensional spaces;
3. It is effective in cases where number of dimensions is greater than the number of samples;
4. It uses a subset of training points in the decision function (called support vectors), so it is also memory efficient.

In [17]:
parameters = {'gamma':np.logspace(-9, 9, 15), 'C':np.logspace(-3, 3, 15)}

model = svm.SVC()
cross_validation = grid_search.GridSearchCV(model, parameters)
cross_validation.fit(X_train2, Y_train2)
cross_validation.best_params_

{'C': 7.1968567300115138, 'gamma': 0.0026826957952797272}

# Building the model

In [47]:
C = cross_validation.best_params_.get('C')
gamma = cross_validation.best_params_.get('gamma')

classifier = svm.SVC(C, gamma = gamma)
classifier.fit(X_train2,Y_train2)
print classifier.n_support_, cross_validation.best_score_

[81 89] 0.817869415808


#### Saving the model

In [48]:
with open('classifier_Pierre_Megret.pickle','wb') as f:
    pickle.dump(classifier, f)
f.close()

# Testing the classifier

##### With a test preview

In [44]:
### this will be replaced with the real test image
im_test = plt.imread('parking_test_preview.png')
###

# This function takes locations (loc) and an image (im) as input parameters and return the feature vector
def my_feature_vector(loc, im, size = 10):
    w = size
    # a patch of the size w cenetered at loc is extracted as a feature vector
    patch = im[loc[1]-w:loc[1]+w, loc[0]-w:loc[0]+w]
    p = np.array(patch).flatten()
    return p 
  
## 10 preview test locations, which will be replaced with the real set of 100 test locations
test_locs_labs = np.load('test_locations_and_labels_preview.np')

test_locs   = test_locs_labs[:,0:2]
test_labels = test_locs_labs[:,2]

# Create the test set
X_test = []
for loc in test_locs:
      X_test.append( my_feature_vector(loc, im_test) )

# Load the model        
my_classifier = pickle.load(open('classifier_Pierre_Megret.pickle')) 

# Calculate the hypothetic score of the classifer
score = 0
for i, xtest in enumerate(X_test): 
    
    predicted = my_classifier.predict(xtest)
    
    if (test_labels[i] == 1.0)&(predicted == 1.0):
        score = score + 2
  
    if (test_labels[i] == 1.0)&(predicted == 0.0):
        score = score - 0.5
  
    if (test_labels[i] == 0.0)&(predicted == 1.0):
        score = score - 0.5
        
    if (test_labels[i] == 0.0)&(predicted == 0.0):
        score = score + 0.25
    
    print test_labels[i], predicted, score
    

print  ('You final Score is: %.2f') % score

1.0 [ 1.] 2
1.0 [ 1.] 4
1.0 [ 1.] 6
1.0 [ 1.] 8
1.0 [ 1.] 10
0.0 [ 0.] 10.25
0.0 [ 0.] 10.5
0.0 [ 0.] 10.75
0.0 [ 0.] 11.0
0.0 [ 0.] 11.25
You final Score is: 11.25


##### With my own testing set

In [45]:
# Create my own test set

X_train_test = np.load('X_trn_tst.np')
Y_train_test = np.load('Y_trn_tst.np')
L = np.array([len(x) for x in X_train_test])
X_train_test = X_train_test[L == 1600]
X_train_test2 = np.array([x for x in X_train_test])
Y_train_test2 = Y_train_test[L == 1600]
X_train_test2.shape

(62L, 1600L)

In [46]:
# Load the classifer
my_classifier = pickle.load(open('classifier_Pierre_Megret.pickle')) 

# Calculate the score
score = 0
for i, xtest in enumerate(X_train_test2): 
    
    predicted = my_classifier.predict(xtest)
    
    if (Y_train_test2[i] == 1.0)&(predicted == 1.0):
        score = score + 2
  
    if (Y_train_test2[i] == 1.0)&(predicted == 0.0):
        score = score - 0.5
  
    if (Y_train_test2[i] == 0.0)&(predicted == 1.0):
        score = score - 0.5
        
    if (Y_train_test2[i] == 0.0)&(predicted == 0.0):
        score = score + 0.25
    
    #print Y_train_test2[i], predicted, score
    

print  ('You final Score is: %.2f / 69.75') % score

You final Score is: 47.75 / 69.75


68, 47 % of accuracy with my own test set