First download the model weights and parameters, and install caffe

In [1]:
import caffe

In [2]:
caffe.set_mode_cpu()

In [3]:
#load the model
net = caffe.Net('model/VGG_CNN_S_deploy.prototxt',
                'model/VGG_CNN_S.caffemodel',
                caffe.TEST)

In [4]:
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2,0,1))
transformer.set_channel_swap('data', (2,1,0))
transformer.set_raw_scale('data', 255.0)
net.blobs['data'].reshape(1,3,224,224)
im = caffe.io.load_image('model/cat.jpg')
net.blobs['data'].data[...] = transformer.preprocess('data', im)

In [5]:
out = net.forward()

In [6]:
top_k = net.blobs['prob'].data[0].flatten().argsort()[-1:-6:-1]
print(top_k)

[331 285 287 281 330 278 277 282 335 332 937 333 998 284]


In [1]:
# generating train and test dataset txt files
import pandas as pd
import numpy as np
import os
def generateTexts():
    train = pd.DataFrame()
    test = pd.DataFrame()
    data = pd.read_csv('combined/haryana.csv')
    data = data.append(pd.read_csv('combined/punjab.csv'))
    print(len(data))
    for index, row in data.iterrows():
        filename = 'image/'+str(row['State'])+str(row['District'])+str(row['tehsil'])+str(row['village'])+'.PNG'
        if os.path.exists(filename):
            entry = []
            entry.append([os.path.abspath(filename),row['electricity'],row['treated_water'],row['electronics'],row['phone'],row['transport']])
            if index%5==0:
                test = test.append(entry)
                print(index)
            else:
                train = train.append(entry)
    train.to_csv('train.txt', ' ', header = False, index = False)
    test.to_csv('test.txt', ' ', header = False, index = False)
    print(train.head(10))

In [1]:
# building the mean image
import sys

import argparse
import numpy as np
import os
import time

from caffe.io import array_to_blobproto
from collections import defaultdict
from skimage import io

def calc_mean():
    #parser = argparse.ArgumentParser()
    #parser.add_argument('meanPrefix', type=str, help="Prefix of the mean file.")
    #parser.add_argument('imageDir', type=str, help="Directory of images to read.")
    #args = parser.parse_args()

    mean = np.zeros((1, 3, 640, 640))
    N = 0
    classSizes = defaultdict(int)

    beginTime = time.time()
    for subdir, dirs, files in os.walk('image'):
        for fName in files:
            if fName.endswith(".PNG"):
                img = io.imread('image/'+fName)
                if img.shape == (640, 640, 3):
                    mean[0][0] += img[:, :, 0]
                    mean[0][1] += img[:, :, 1]
                    mean[0][2] += img[:, :, 2]
                    N += 1
                    if N % 100 == 0:
                        elapsed = time.time() - beginTime
                        print("Processed {} images in {:.2f} seconds. "
                              "{:.2f} images/second.".format(N, elapsed,
                                                             N / elapsed))
    mean[0] /= N

    blob = array_to_blobproto(mean)
    with open("mean_minor.binaryproto", 'wb') as f:
        f.write(blob.SerializeToString())
    np.save("mean_minor_np.npy", mean[0])

    meanImg = np.transpose(mean[0].astype(np.uint8), (1, 2, 0))
    io.imsave("mean_img.png", meanImg)
    return mean[0]

In [5]:
# generating hdf5 type input
import caffe
import h5py
def generateHDF5FromText2(mean):
    print('\nplease wait...')

    HDF5_FILE = ['hdf5_train.h5', 'hdf5_test1.h5']
        #store the training and testing data path and labels
    LIST_FILE = ['train.txt','test.txt']
    for kk, list_file in enumerate(LIST_FILE):
        #reading the training.txt or testing.txt to extract the all the image path and labels, store into the array
        path_list = []
        label_list = []
        with open(list_file, buffering=1) as hosts_file:
            for line in hosts_file:
                line = line.rstrip()
                array = line.split(' ')
                label_list.append(array[1:])
                path_list.append(array[0])
            print(len(path_list), len(label_list))
            f = h5py.File(HDF5_FILE[kk], 'w')
            f.create_dataset('data', (len(path_list),3,640,640), dtype='f4')
            f.create_dataset('labels', (len(path_list),5), dtype='f4')
            f.close()
            # init the temp data and labels storage for HDF5
            datas = np.zeros((100,3,640,640),dtype='f4') 
            labels = np.zeros((100, 5),dtype="f4")
            counter = 0
            for ii, _file in enumerate(path_list):
                if ii<(counter+1)*100:
                    # feed the image and label data to the TEMP data
                    img = caffe.io.load_image( _file )
                    img = caffe.io.resize( img, (640, 640, 3) ) # resize to fixed size
                    img = np.transpose( img , (2,0,1))
                    img *= 255.
                    img -= mean
                    img = img[::-1, :, :]
                    datas[ii-(counter*100)] = img
                    labels[ii-(counter*100)] = label_list[ii-(counter*100)]
                else:
                    # store the temp data and label into the HDF5
                    with h5py.File(HDF5_FILE[kk], 'a') as f:
                        f['data'][counter*100:ii] = datas[0:ii-(counter*100)]
                        f['labels'][counter*100:ii] = labels[0:ii-(counter*100)]
                        f.close()
                    print(counter)
                    counter+=1

In [3]:
generateTexts()

17864
5870
5885
5900
5915
5930
5945
5960
5975
5990
6005
6020
6035
6050
6065
6080
6095
6110
6125
6140
6155
6170
6185
6200
6215
6230
6245
6260
6275
6290
6305
6320
6335
6350
6365
6380
6395
6410
6425
6440
6455
6470
6485
6500
6515
6530
6545
6560
6575
6590
6605
6620
6635
6650
6665
6680
6695
6710
6725
6740
6755
6770
6785
6800
6815
6830
6845
6860
6875
6890
6905
6920
6935
6950
6965
6980
6995
7010
7025
7040
7055
7070
7085
7100
7115
7130
7145
7160
7175
7190
7205
7220
7235
7250
7265
7280
7295
7310
7325
7340
7355
7370
7385
7400
7415
7430
7445
7460
7475
7490
7505
7520
7535
7550
7565
7580
7595
7610
7625
7640
7655
7670
7685
7700
7715
7730
7745
7760
7775
7790
7805
7820
7835
7850
7865
7880
7895
7910
7925
7940
7955
7970
7985
8000
8015
8030
8045
8060
8075
8090
8105
8120
8135
8150
8165
8180
8195
8210
8225
8240
8255
8270
8285
8300
8315
8330
8345
8360
8375
8390
8405
8420
8435
8450
8465
8480
8495
8510
8525
8540
8555
8570
8585
8600
8615
8630
8645
8660
8675
8690
8705
8720
8735
8750
8765
8780
8795
8810
8825
8840

In [6]:
generateHDF5FromText2(calc_mean())

Processed 100 images in 2.53 seconds. 39.46 images/second.
Processed 200 images in 5.38 seconds. 37.14 images/second.
Processed 300 images in 7.69 seconds. 39.01 images/second.
Processed 400 images in 10.19 seconds. 39.25 images/second.
Processed 500 images in 12.60 seconds. 39.69 images/second.
Processed 600 images in 15.04 seconds. 39.89 images/second.
Processed 700 images in 17.56 seconds. 39.85 images/second.
Processed 800 images in 19.78 seconds. 40.44 images/second.
Processed 900 images in 21.68 seconds. 41.50 images/second.
Processed 1000 images in 23.46 seconds. 42.63 images/second.
Processed 1100 images in 25.65 seconds. 42.88 images/second.
Processed 1200 images in 28.02 seconds. 42.83 images/second.
Processed 1300 images in 30.13 seconds. 43.15 images/second.
Processed 1400 images in 32.32 seconds. 43.32 images/second.
Processed 1500 images in 34.46 seconds. 43.53 images/second.
Processed 1600 images in 36.57 seconds. 43.75 images/second.
Processed 1700 images in 38.95 secon

  warn('%s is a low contrast image' % fname)



please wait...
1564 1564


  warn("The default mode, 'constant', will be changed to 'reflect' in "


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
391 391
0
1
2


In [1]:
import h5py
with h5py.File('hdf5_train.h5', 'r') as f:
    print(f['data'][1000:1005])
    print(f['labels'][1000:1005])
    f.close()

  from ._conv import register_converters as _register_converters


[[[[-9.38104212e-01 -1.14679992e+00 -1.10485613e+00 ... -2.27033234e+01
    -4.68491058e+01 -3.08946285e+01]
   [-1.08833723e+01 -1.49687977e+01 -1.50659847e+01 ... -2.65232735e+01
    -4.07288971e+01 -3.47155991e+01]
   [-2.28593349e+01 -2.29851646e+01 -1.80521736e+01 ... -3.45360603e+01
    -2.68567772e+01 -3.45416870e+01]
   ...
   [-3.33375931e+01 -3.35739098e+01 -3.35800514e+01 ... -2.18311954e+00
    -5.24603510e+00 -5.14680195e+00]
   [-3.36629143e+01 -3.36890030e+01 -3.39038353e+01 ... -5.28286362e+00
    -5.23836231e+00 -5.19334936e+00]
   [-2.97953949e+01 -3.38608665e+01 -3.40363159e+01 ... -5.38311911e+00
    -5.47212172e+00 -5.33196831e+00]]

  [[ 3.42455459e+00  3.12429881e+00  3.18772602e+00 ... -2.24112492e+01
    -5.45196915e+01 -3.44629135e+01]
   [-5.68132734e+00 -6.72020197e+00 -6.81329632e+00 ... -2.61335030e+01
    -4.34332466e+01 -3.83820953e+01]
   [-1.44352903e+01 -1.48081808e+01 -1.38777466e+01 ... -3.42301788e+01
    -3.04409199e+01 -3.42086945e+01]
   ...
   

The model finally begins

In [1]:
import caffe

caffe.set_mode_cpu()

import numpy as np
from pylab import *
%matplotlib inline
import tempfile

In [2]:
model_root = './model/'
import os
weights = os.path.join(model_root, 'VGG_CNN_S.caffemodel')
assert os.path.exists(weights)

In [None]:
solver = caffe.get_solver(model_root+'solver.prototxt')

In [None]:
solver.net.copy_from(weights)