In [1]:
from keras.models import Model, Sequential
from keras.layers import Dense, Input, Activation, Dropout, BatchNormalization
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

  from ._conv import register_converters as _register_converters
Using Theano backend.


In [2]:
iris = load_iris()
X = iris.data
y = iris.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

In [4]:
### Functional model
# inputs = Input(shape=(4,))
# x = Dense(6, activation='relu')(inputs)
# x = Dense(6, activation='relu')(x)
# outputs = Dense(3, activation='softmax')(x)

In [None]:
#Sequential model
model = Sequential()
model.add(Dense(16, activation='relu', input_shape = (4,)))
model.add(Dropout(0.5))
model.add(Dense(16, activation='relu'))
model.add(Dropout(0.5))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(3, activation='softmax'))

In [None]:
model.compile(optimizer='rmsprop', 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
model.fit(X_train, y_train, epochs=1000, verbose=0)

In [None]:
model.history.history.get('acc')[-1]

In [None]:
plt.plot(model.history.history.get('acc'),'g')

In [None]:
plt.plot(model.history.history.get('loss'),'r')

In [None]:
pred = model.predict(X_test)
pred2 = np.argmax(pred, axis=1)

In [None]:
accuracy_score(pred2, y_test)

# Running it with Grid Search

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

In [None]:
def build_classifier(optimizer):
    model = Sequential()
    
    model.add(Dense(16, activation='relu', input_shape = (4,)))
    model.add(Dropout(0.5))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.5))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(3, activation='softmax'))
    
    model.compile(optimizer=optimizer, 
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

In [None]:
classifier = KerasClassifier(build_fn = build_classifier)

In [None]:
parameters = {'batch_size':[1,5],
              'epochs':[100,200], 
              'optimizer':['adam', 'rmsprop']}
parameters

In [None]:
grid_search = GridSearchCV(estimator=classifier, 
                           param_grid=parameters,
                           scoring='accuracy',
                           cv=3)

In [None]:
grid_search = grid_search.fit(X_train, y_train)

In [None]:
grid_search.best_estimator_

In [None]:
grid_search.best_params_

In [None]:
grid_search.best_score_

In [None]:
pred = grid_search.predict(X_test)
#pred2 = np.argmax(pred, axis=1)

In [None]:
accuracy_score(pred, y_test)

# LSTM

In [5]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [6]:
df = pd.read_csv('job_skills.csv')

In [7]:
df.shape

(1250, 7)

In [8]:
df1 = df.drop(['Company', 'Location', 'Category'], axis=1)

In [9]:
y = df['Category']

In [10]:
def concatenate(row):
    return ' '.join([str(i) for i in row])

In [11]:
df1['combined'] = [concatenate(list(df1.iloc[i])) for i in range(len(df1))]

In [12]:
X = df1['combined']

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify=y)

In [14]:
[len(i) for i in [X_train, X_test, y_train, y_test]]

[1000, 250, 1000, 250]

In [15]:
cnt = CountVectorizer(stop_words='english')

In [16]:
features = cnt.fit_transform(X_train)

In [17]:
features.shape

(1000, 4974)

In [18]:
lbl = LabelEncoder()
encoded_labels = lbl.fit_transform(y_train)

In [19]:
len(np.unique(encoded_labels))

23

In [20]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Embedding
from keras.layers import LSTM

In [21]:
max_features = features.shape[1]
max_features

4974

In [24]:
model = Sequential()
model.add(Embedding(max_features, output_dim=128))
model.add(LSTM(64))
model.add(Dropout(0.5))
model.add(Dense(23, activation='sigmoid'))

model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam', 
              metrics=['accuracy'])

In [25]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, None, 128)         636672    
_________________________________________________________________
lstm_2 (LSTM)                (None, 64)                49408     
_________________________________________________________________
dropout_2 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 23)                1495      
Total params: 687,575
Trainable params: 687,575
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.fit(features, 
          encoded_labels, 
          epochs=1, 
          batch_size=16)

Epoch 1/1
 192/1000 [====>.........................] - ETA: 6:05 - loss: 3.1169 - acc: 0.0781