Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Joseph cleanup #23

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# EditorConfig helps developers define and maintain consistent
# coding styles between different editors and IDEs
# editorconfig.org

root = true


[*]

# Change these settings to your own preference
indent_style = space
indent_size = 4

# We recommend you to keep these unchanged
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true

[*.md]
trim_trailing_whitespace = false
18 changes: 18 additions & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#My pylintrc for use with atom.io's linter-pylint
[MESSAGES CONTROL]
disable=W0311,W1201,W0702,W0603,W0611,W0621,W0703,W0212,E1101,E0611,C0111,C0103,R0902,R0914,I0011

# checks for :
# * unauthorized constructions
# * strict indentation
# * line length
# * use of <> instead of !=
#
[FORMAT]
# Maximum number of characters on a single line.
max-line-length=128
# Maximum number of lines in a module
max-module-lines=1000
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
# tab). In repo it is 2 spaces.
indent-string=' '
11 changes: 7 additions & 4 deletions bootstrap.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
#!/usr/bin/env python

from __future__ import print_function

import os
import glob
import sys
import tarfile
from shutil import copyfile, rmtree
import numpy as np
from scipy.io import loadmat
from shutil import copyfile, rmtree
import sys
import config

if sys.version_info[0] >= 3:
from urllib.request import urlretrieve
from urllib.request import urlretrieve # #pylint: disable=W,E
else:
# Not Python 3 - today, it is most likely to be Python 2
# But note that this might need an update when Python 4
Expand All @@ -31,7 +34,7 @@ def download_file(url, dest=None):

flowers_archive_path = os.path.join(data_path, '102flowers.tgz')
if not os.path.isfile(flowers_archive_path):
print ('Downloading images...')
print('Downloading images...')
download_file('http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz')
tarfile.open(flowers_archive_path).extractall(path=data_path)

Expand Down
7 changes: 4 additions & 3 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
data_dir = join_path(abspath, 'data/sorted')
trained_dir = join_path(abspath, 'trained')

train_dir, validation_dir = None, None
train_dir, validation_dir, test_dir = None, None, None

MODEL_VGG16 = 'vgg16'
MODEL_INCEPTION_V3 = 'inception_v3'
Expand Down Expand Up @@ -37,12 +37,13 @@

nb_train_samples = 0
nb_validation_samples = 0

nb_test_samples = 0

def set_paths():
global train_dir, validation_dir
global train_dir, validation_dir, test_dir
train_dir = join_path(data_dir, 'train/')
validation_dir = join_path(data_dir, 'valid/')
test_dir = join_path(data_dir, 'test/')


set_paths()
Expand Down
13 changes: 13 additions & 0 deletions download_pretrained_resnet50.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/sh

DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )

cd $DIR
mkdir trained
cd trained

HOST='https://drop.jackhftang.com/storage'
curl -LO "$HOST/model-resnet50.h5"
curl -LO "$HOST/fine-tuned-resnet50-weights.h5"
curl -LO "$HOST/classes-resnet50"

8 changes: 6 additions & 2 deletions models/base_model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import print_function

from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing import image
from keras.applications.imagenet_utils import preprocess_input
Expand Down Expand Up @@ -43,7 +45,8 @@ def _fine_tuning(self):
train_data,
steps_per_epoch=config.nb_train_samples / float(self.batch_size),
epochs=self.nb_epoch,
validation_data=self.get_validation_datagen(),
validation_data=self.get_validation_datagen(rotation_range=30., shear_range=0.2,
zoom_range=0.2, horizontal_flip=True),
validation_steps=config.nb_validation_samples / float(self.batch_size),
callbacks=callbacks,
class_weight=self.class_weight)
Expand All @@ -52,7 +55,8 @@ def _fine_tuning(self):
train_data,
samples_per_epoch=config.nb_train_samples,
nb_epoch=self.nb_epoch,
validation_data=self.get_validation_datagen(),
validation_data=self.get_validation_datagen(rotation_range=30., shear_range=0.2,
zoom_range=0.2, horizontal_flip=True),
nb_val_samples=config.nb_validation_samples,
callbacks=callbacks,
class_weight=self.class_weight)
Expand Down
4 changes: 3 additions & 1 deletion predict.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from __future__ import print_function

import time
import argparse
import os
import numpy as np
import glob
import numpy as np
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.externals import joblib

Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ scikit_learn
theano==0.9.0
h5py
Pillow
tensorflow-gpu
tensorflow-gpu
Augmentor
10 changes: 6 additions & 4 deletions server.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from __future__ import print_function

import os
import argparse
import socket
import traceback
from threading import Thread
import numpy as np
import os
import argparse
import config
import util
from sklearn.externals import joblib
import traceback

util.set_img_format()

Expand Down Expand Up @@ -60,7 +62,7 @@ def handle(clientsocket):
predicted_relativity = novelty_detection_clf.predict(acts)[0]
nd_class = novelty_detection_clf.__classes[predicted_relativity]
except Exception as e:
print(e.message)
print(e)
nd_class = 'related'

top10_json = "["
Expand Down
84 changes: 84 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from __future__ import print_function

import os
import argparse
import traceback
import numpy as np

import util
import config
import keras
from keras.preprocessing.image import ImageDataGenerator

np.random.seed(1337) # for reproducibility
batch_size=16

def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', help='Path to data dir')
parser.add_argument('--trained_file', default='./trained/model-resnet50.h5', help='Path to trained file')
parser.add_argument('--model', type=str, default='resnet50', help='Base model architecture',
choices=[config.MODEL_RESNET50, config.MODEL_RESNET152, config.MODEL_INCEPTION_V3,
config.MODEL_VGG16])
args = parser.parse_args()
config.model = args.model
config.trained_file = args.trained_file
return args


def init():
util.set_img_format()
util.set_classes_from_train_dir()
util.set_samples_info()
if not os.path.exists(config.trained_file):
raise Exception('trained_file not exists')

def test():
img_size = (224, 224)

print("Creating model...")

model = keras.models.load_model(config.trained_file)

print("Model is created")

idg = ImageDataGenerator()
idg.mean = np.array([103.939, 116.779, 123.68], dtype=np.float32).reshape((3, 1, 1))
test_generator = idg.flow_from_directory(config.test_dir,
batch_size=batch_size,
target_size=img_size,
classes=config.classes)

# -- Evaluate generator -- #
result = model.evaluate_generator(
generator=test_generator,
steps=config.nb_test_samples)

print("Model [loss, accuracy]: {0}".format(result))

# -- Predict generator -- #
predict = model.predict_generator(
generator=test_generator,
steps=config.nb_test_samples)

print("model predictions: {0}".format(predict))
print('Testing is finished!')


if __name__ == '__main__':
try:
args = parse_args()
if args.data_dir:
config.data_dir = args.data_dir
config.set_paths()
if args.model:
config.model = args.model

init()
test()

except Exception as e:
print(e)
traceback.print_exc()
finally:
util.unlock()
15 changes: 9 additions & 6 deletions train.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
import numpy as np
from __future__ import print_function

import os
import argparse
import traceback
import os

np.random.seed(1337) # for reproducibility
import numpy as np

import util
import config

np.random.seed(1337) # for reproducibility

def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', help='Path to data dir')
parser.add_argument('--model', type=str, required=True, help='Base model architecture', choices=[
parser.add_argument('--model', type=str, help='Base model architecture', choices=[
config.MODEL_RESNET50,
config.MODEL_RESNET152,
config.MODEL_INCEPTION_V3,
config.MODEL_VGG16])
parser.add_argument('--nb_epoch', type=int, default=1000)
parser.add_argument('--freeze_layers_number', type=int, help='will freeze the first N layers and unfreeze the rest')
parser.add_argument('--freeze_layers_number',
type=int, help='will freeze the first N layers and unfreeze the rest')
return parser.parse_args()


Expand Down Expand Up @@ -52,6 +54,7 @@ def train(nb_epoch, freeze_layers_number):

init()
train(args.nb_epoch, args.freeze_layers_number)

except Exception as e:
print(e)
traceback.print_exc()
Expand Down
4 changes: 3 additions & 1 deletion train_novelty_detection.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from __future__ import print_function

import argparse
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
Expand All @@ -7,7 +10,6 @@
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint
import argparse
import pandas as pd
import numpy as np
import config
Expand Down
31 changes: 17 additions & 14 deletions util.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,26 @@
from __future__ import print_function

import os
import glob
import math
import itertools

import importlib
import matplotlib

matplotlib.use('Agg') # fixes issue if no GUI provided

import matplotlib.pyplot as plt

import seaborn as sns

sns.set(style='white')

import numpy as np
import os
import glob
import pandas as pd
import importlib
import keras
from keras import backend as K
from keras.preprocessing.image import DirectoryIterator
import config
import math
import itertools

sns.set(style='white')


def save_history(history, prefix):
Expand Down Expand Up @@ -103,16 +107,17 @@ def set_samples_info():
"""Walks through the train and valid directories
and returns number of images"""
white_list_formats = {'png', 'jpg', 'jpeg', 'bmp'}
dirs_info = {config.train_dir: 0, config.validation_dir: 0}
dirs_info = {config.train_dir: 0, config.validation_dir: 0, config.test_dir: 0}
for d in dirs_info:
iglob_iter = glob.iglob(d + '**/*.*')
for i in iglob_iter:
filename, file_extension = os.path.splitext(i)
_, file_extension = os.path.splitext(i)
if file_extension[1:] in white_list_formats:
dirs_info[d] += 1

config.nb_train_samples = dirs_info[config.train_dir]
config.nb_validation_samples = dirs_info[config.validation_dir]
config.nb_test_samples = dirs_info[config.test_dir]


def get_class_weight(d):
Expand Down Expand Up @@ -150,8 +155,6 @@ def set_classes_from_train_dir():
def override_keras_directory_iterator_next():
"""Overrides .next method of DirectoryIterator in Keras
to reorder color channels for images from RGB to BGR"""
from keras.preprocessing.image import DirectoryIterator

original_next = DirectoryIterator.next

# do not allow to override one more time
Expand Down Expand Up @@ -192,8 +195,8 @@ def save_activations(model, inputs, files, layer, batch_number):
all_activations = []
ids = []
af = get_activation_function(model, layer)
for i in range(len(inputs)):
acts = get_activations(af, [inputs[i]])
for i, inp in enumerate(inputs):
acts = get_activations(af, [inp])
all_activations.append(acts)
ids.append(files[i].split('/')[-2])

Expand Down