In [146]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.externals import joblib

In [147]:
df = pd.read_csv('job_skills-Copy1.csv')
df.dropna(inplace=True)
df.head()

Unnamed: 0,Company,Title,Category,Location,Responsibilities,Minimum Qualifications,Preferred Qualifications
0,Google,Google Cloud Program Manager,Program Management,Singapore,"Shape, shepherd, ship, and show technical prog...",BA/BS degree or equivalent practical experienc...,Experience in the business technology market a...
1,Google,"Supplier Development Engineer (SDE), Cable/Con...",Manufacturing & Supply Chain,"Shanghai, China",Drive cross-functional activities in the suppl...,BS degree in an Engineering discipline or equi...,"BSEE, BSME or BSIE degree.\nExperience of usin..."
2,Google,"Data Analyst, Product and Tools Operations, Go...",Technical Solutions,"New York, NY, United States",Collect and analyze data to draw insight and i...,"Bachelor’s degree in Business, Economics, Stat...",Experience partnering or consulting cross-func...
3,Google,"Developer Advocate, Partner Engineering",Developer Relations,"Mountain View, CA, United States","Work one-on-one with the top Android, iOS, and...",BA/BS degree in Computer Science or equivalent...,"Experience as a software developer, architect,..."
4,Google,"Program Manager, Audio Visual (AV) Deployments",Program Management,"Sunnyvale, CA, United States",Plan requirements with internal customers.\nPr...,BA/BS degree or equivalent practical experienc...,CTS Certification.\nExperience in the construc...


## Model training

Lets create a model that predicts the job Category based on job Responsibilities, cuz why not.

In [148]:
X = df['Responsibilities']
y = df['Category']

In [149]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2)
print(len(X_train), len(X_test), len(y_train), len(y_test))

988 247 988 247


In [150]:
tfidf = TfidfVectorizer()
svm = LinearSVC()
pipe = Pipeline([('tfidf',tfidf),('svm',svm)])

In [151]:
pipe.fit(X_train, y_train)

Pipeline(memory=None,
     steps=[('tfidf', TfidfVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), norm='l2', preprocessor=None, smooth_idf=True,
 ...ax_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0))])

In [152]:
y_predicted = pipe.predict(X_test)

In [153]:
accuracy_score(y_test, y_predicted)

0.8016194331983806

## Saving the model

In [154]:
joblib.dump(pipe, 'test_model.pkl')

['test_model.pkl']

In [155]:
test_model = joblib.load('test_model.pkl')

In [156]:
responsibility = ['Handles all financial transactions between client and company. Manages respective departments ensuring profitability']
test_model.predict(responsibility)

array(['Finance'], dtype=object)

## Servicing the model

Refer deployed_model.py

In [None]:
# from sklearn.externals import joblib
# from flask import Flask, jsonify

# app = Flask(__name__)

# @app.route('/<inp>')
# def predict_category(inp):
#     responsibility = [str(inp)]
#     model = joblib.load('test_model.pkl')
#     output = model.predict(responsibility)[0]
#     return jsonify(category = output)

# if __name__ == '__main__':
#     app.run(host='0.0.0.0',port=5000)

Post this we run the python file - 

    (for testing) python deployed_model.py
    (for background) nohup python deployed_model.py $
    
Finally access it via 0.0.0.0:5000/<query> - 

    0.0.0.0:5000/Handles all financial transactions between client and company. Manages respective departments ensuring profitability
    
    
This query returns the following output - 

    {
      "category": "Finance"
    }