In [16]:
# import database stuff

from abelian import db
from abelian.models import Job, Company

db.configure('postgresql://kalibrr:password@localhost/kalibrr', False)
session = db.get_session()
session.rollback()

In [2]:
jobs = session.query(Job.industry, \
                     Job.name, \
                     Job.id). \
    filter(~Job.is_test, \
           Job.active, \
           Job.company_id==Company.id, \
           ~Company.is_test). \
    all()

In [3]:
job_industries, job_titles, _ = zip(*jobs)

job_industries = [s.encode('ascii', 'ignore') if isinstance(s, unicode) else '' for s in job_industries]
job_titles = [s.encode('ascii', 'ignore') if isinstance(s, unicode) else '' for s in job_titles]

In [4]:
%%time

from cleaning import clean

clean_job_industries = clean(job_industries, style='industry')
clean_job_titles = clean(job_titles, style='job title')

CPU times: user 9.53 s, sys: 463 ms, total: 9.99 s
Wall time: 9.31 s


In [5]:
%%time

from clustering import stem_cluster

job_industry_clusters = stem_cluster(clean_job_industries, mode=8)
job_title_clusters = stem_cluster(clean_job_titles, mode=8, length_at_least=4)

CPU times: user 3.51 s, sys: 623 ms, total: 4.13 s
Wall time: 3.45 s


In [6]:
%%time

from analysis import LikelihoodMatrix

IndustryJob = LikelihoodMatrix(job_industry_clusters, job_title_clusters)

CPU times: user 380 ms, sys: 66.7 ms, total: 447 ms
Wall time: 442 ms


In [7]:
IndustryJob.dataframe.head()

Unnamed: 0,ENGINE,TESTER,MONTHLY,POSITIONS,SG16,SG11,AIRLINE,SG12,VOICE,WITH,...,MIGUEL,CALL,GRADS,TRAINEE,TEST,DEVELOPER,DAVAO,BUREAU,SPEAKING,OFFICER
OPERATIONS,0,0,0,0.238803,0,0,0.0,0,0,0,...,0,0,0,0.0,0,0.202155,0,0,0.0,0
ENGINE,1,0,0,0.0,0,0,0.088787,0,0,0,...,0,0,0,0.101319,0,0.0,0,0,0.0,0
CONSULTING,0,0,0,0.046848,0,0,0.0,0,0,0,...,0,0,0,0.0,0,0.0,0,0,0.032893,0
TRANSPORTATION,0,0,0,0.086663,0,0,0.0,0,0,0,...,0,0,0,0.046327,0,0.0,0,0,0.0,0
DEVELOPMENT,0,0,0,0.0,0,0,0.0,0,0,0,...,0,0,0,0.0,0,0.614702,0,0,0.0,0


In [8]:
%%time
%%timeit

for industry, job in zip(clean_job_industries, clean_job_titles):
    IndustryJob.add_match(industry, job)

The slowest run took 26.42 times longer than the fastest. This could mean that an intermediate result is being cached 
1 loops, best of 3: 5.05 s per loop
CPU times: user 2min 28s, sys: 86.7 ms, total: 2min 28s
Wall time: 2min 28s


In [9]:
IndustryJob

Unnamed: 0,ENGINE,TESTER,MONTHLY,POSITIONS,SG16,SG11,AIRLINE,SG12,VOICE,WITH,SG18,PROVIDER,CUSTOMER,SYSTEM,OUTBOUND,SERVICE,POWER,ARTIST,INBOUND,STUDENTS,GENERAL,COORDINATOR,ENGINEERING,ELECTRICAL,HUMAN,TREASURY,MANDARIN,PASAY,ADMINISTRATIVE,AUDIT,FULL,SUPPLY,RECRUITER,FACULTY,COLLECTION,DIRECTOR,SURVEYOR,CHIEF,PLANNING,REPRESENTATIVE,PROFESSIONAL,ENTRY,ANDROID,EMAIL,JUNIOR,RESOURCES,ENGINEERS,EXPERIENCED,RESEARCH,FRONT,PROCESS,HOUSE,WELCOME,AGENTS,SHIFT,CREDIT,PART,TELUS,HEALTHCARE,ACCOUNTS,ATTENDANT,PROPERTY,CONSUMER,TRAVEL,SG10,ATTORNEY,GRAPHIC,BASED,BUSINESS,GOVERNMENT,RESERVATIONS,SUPPORT,DIRECT,SERVICES,HIRING,COLL,TAGUIG,ARCHITECT,EXPERIENCE,ENGLISH,RESTAURANT,MARKETING,NURSE,MARKET,SEARCH,CENTER,INVESTIGATOR,CIVIL,HOTEL,COMPANY,CASHIER,BOOKKEEPER,TELESALES,MANAGER,PRODUCTION,SPECIAL,INFORMATION,EXCISE,DESK,FOOD,MEDIA,INTERNET,DRIVER,TRAINING,RELATIONS,PROCUREMENT,CEBU,SYSTEMS,PROMODIZER,OPERATOR,ACCOUNTING,POST,TECHNOLOGY,INVESTMENT,FINANCIAL,EXPERT,LEAD,REVENUE,MOBILE,SUCAT,PURCHASING,CREW,ACCOUNTANT,TIME,JAPAN,SENIOR,EDUCATION,QUALITY,TEACHER,STAFF,REAL,SPECIALIST,ACCOUNT,WORK,OFFICE,TECHNICAL,CARAGA,REGIONAL,REPORTS,AIDE,APPLICATION,GAME,ASSOCIATES,CHAT,TECHNICIAN,HOME,CONTRACTUAL,GLOBAL,ENGINEER,ADMINISTRATOR,MANAGEMENT,OPTI,FINANCE,AREA,EXECUTIVE,AGENT,PLANT,CONCERNS,TECH,SPANISH,PERSONNEL,APPLY,STILL,BANKING,ANALYST,DAYSHIFT,QUANTITY,DEVELOPMENT,CENTRAL,ESTATE,CITY,ADMIN,TELEMARKETER,MECHANICAL,SITE,CONTENT,DEPARTMENT,CORP,INVENTORY,NEED,PASIG,PROGRAMMER,CARD,CARE,EARN,JOIN,INTERNATIONAL,PIONEER,CONSULTANT,CLERK,WRITER,COMMUNICATIONS,ASSISTANT,INTERNAL,SAFETY,VIII,DESIGN,AUDITOR,OPERATIONS,ORTIGAS,SUPERVISOR,INTERNSHIP,INTELLIGENCE,MEDICAL,INCENTIVES,REPRESENTATIVES,BUDGET,URGENT,INTERN,DESIGNER,COLLECTIONS,BILINGUAL,BASIC,DATA,ASSURANCE,SECRETARY,CONTROL,RESOURCE,BILLING,MAKATI,TELCO,CORPORATE,HELPDESK,LEVEL,ADMINISTRATION,ENCODER,RECRUITMENT,RECEPTIONIST,QUEZON,TEAM,PROGRAM,ASSISTANTS,RETAIL,ASSESSMENT,STATISTICIAN,SOFTWARE,DIVISION,HEAD,CABLE,ROCKWELL,SOCIAL,TELEPERFORMANCE,LEGAL,JAPANESE,PROJECT,COMPUTER,MUOZ,FEMALE,DIGITAL,LOCAL,STORE,TRAINER,LOOKING,PRODUCT,JAVA,ASSOCIATE,TELECOM,AYALA,SALES,FIELD,CREATIVE,HOLDINGS,MIGUEL,CALL,GRADS,TRAINEE,TEST,DEVELOPER,DAVAO,BUREAU,SPEAKING,OFFICER
OPERATIONS,5.202232,0.0,0.0,1.053966,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.761556,0.0,0.0,0.0,0.0,4.560227,11.098673,5.885916,0.0,1.231925,0.0,1.805474,0,10.622515,0.0,0.0,1.557455,0.0,0.0,0.118822,1.736621,0.0,2.146213,1.540415,1.941737,0.029552,0.0,0.0,1.004024,0.0,1.551556,5.853419,4.380438,0.0,1.106772,0.0,4.139484,0.0,0.0,1.562382,0.0,2.013829,0.0,0.0,3.208019,0.0,0.214661,0.054954,0.0,0.0,0.0,0.0,0.0,5.459296,0.0,0.286256,4.125015,1.35678,7.836484,0.966659,0.0,0.0,0.0,4.740067,0.0,7.678255,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.538954,0.0,0.044053,5.65435,66.886237,6.305224,4.688379,0.187131,0.0,0.0,0.892949,0.0,0.0,5.049597,2.189754,2.027497,0.0,0.934925,0.0,2.327342,25.25875,4.218168,0.0,0.0,0.0,0.0,0.071986,2.597962,2.73389,0.0,0.0,1.845144,4.486015,3.767469,0.0,1.177259,2.836104,3.335711,2.405846,0.0,4.33541,0.0,6.152299,4.563615,1.37547,27.640297,1.434937,0.0,0.038744,0.0,2.630849,1.912699,0.0,1.852264,0.0,2.207137,0.0,0.0,0.0,6.532621,9.987882,44.47708,0.0,0.794545,2.000757,13.664938,0.0,2.467304,0.0,0.918508,0.0,1.753459,0.0,0.0,0.0,6.709392,0.999022,0.0,3.72253,0.0,0.0,0.875047,5.414094,0.0,0.0,0.0,0.0,0.901957,0.0,1.517726,0.0,0.0,2.684725,0.0,0.0,0.0,0.0,1.004368,0.043544,5.782368,3.754977,0.0,3.424837,37.57466,0.0,0.0,0.0,0.0,0.0,36.8423,0.0,19.375091,0.0,0.0,0.0,0.0,1.856007,0.0,0.973482,0.807481,0.0,0.220776,0.0,0.0,4.329844,1.379041,4.151297,4.049612,1.702056,0.0,0.053484,0.0,0.061624,0.0,1.177143,10.131163,1.669694,0.0,9.268438,0.0,1.320598,3.548905,34.892443,1.194194,0.0,0.035566,0.0,1.048409,4.039202,0.0,0.0,1.031828,3.103942,0.0,1.792694,11.821317,0.0,0.0,0.0,0.0,0.0,7.452685,8.643516,0.0,5.097859,0.0,2.007826,0.0,0.0,11.401055,7.449513,0.04702,0.0,0.0,0.978089,1.352714,9.077719,0.0,2.490402,1.236155,0.0,0.0,27.374201
ENGINE,103.844916,0.894486,0.0,1.48069,0.0,0.0,0.088787,0.0,0.0,0.0,0.0,0.0,0.0,4.583196,0.0,5.774566,5.055854,0.0,0.0,0.0,0.0,9.183526,111.893654,13.81242,0.0,0.0,0.0,0,0.0,0.0,2.705369,0.0,0.0,6.340823,0.961834,0.0,17.217523,2.288148,11.928248,0.0,0.0,0.0,0.0,0.0,0.0,0.0,120.974158,0.0,2.174342,0.695968,0.0,0.0,0.0,0.0,0.0,0.750606,0.876185,0.0,0.0,1.036546,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.104712,2.77238,0.0,0.0,2.64679,0.0,5.475346,0.0,0.458985,0.0,4.183192,0.0,2.625322,0.0,0.886023,0.0,0.6507,1.763781,1.544514,0.0,6.12535,0.0,0.542136,0.0,0.0,0.474839,25.961798,4.406874,9.503112,2.608228,0.0,0.0,0.0,0.0,0.557497,0.0,0.577599,0.0,0.0,0.0,4.200434,0.783779,11.355237,1.573428,0.0,2.571201,0.0,0.0,0.0,0.719712,0.0,0.0,1.234735,1.371031,0.0,1.53819,0.0,2.894003,12.95637,0.0,6.702107,0.0,1.599904,0.0,12.470406,1.866201,2.622578,7.056803,7.616627,0.0,0.0,0.0,0.0,4.226336,0.0,0.0,0.0,7.12341,0.0,0.0,10.041538,132.856061,1.604406,17.692726,0.0,0.0,1.217976,0.942343,0.0,8.657011,0.0,1.838678,0.0,0.542763,0.0,0.0,0.0,3.43745,0.0,11.972598,18.592407,0.0,0.0,0.0,0.0,0.0,16.080641,3.274655,1.680965,1.956287,0.0,0.0,0.0,0.0,7.080638,0.0,0.0,0.890083,0.0,0.921799,0.0,0.0,0.0,3.058708,0.800463,8.085576,0.74197,5.843763,0.0,13.212918,0.567355,7.294491,0.0,5.009023,0.880924,0.0,0.0,0.0,0.0,0.0,0.0,0.776101,12.035557,0.914832,0.797579,0.0,0.904156,0.0,1.03528,7.427417,0.0,0.0,0.0,0.0,0.0,0.0,4.010422,0.0,0.0,0.0,1.090068,1.218263,0.0,5.81577,7.508397,0.0,0.0,0.0,5.995661,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.463312,32.889104,1.43399,0.0,0.0,0.0,0.972336,1.612107,0.098039,1.932395,4.19727,0.67329,0.0,0.0,0.0,0.691248,3.177357,0.0,13.474972,1.232474,0.0,0.769386,1.858255,2.105717,29.213794,0.0,0.0,0.0,7.364197
CONSULTING,0.0,0.0,0.0,0.046848,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.972784,0.0,0.0,1.805222,0.0,0.0,0.0,0.0,0.0,0.0,0.029404,0.0,1.393666,0.0,0.0,0,0.0,2.159016,0.0,0.0,1.880145,0.079812,0.0,0.0,0.0,1.36073,0.032893,1.120818,0.0,0.0,0.0,0.0,1.079382,1.870615,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.110283,0.0,0.0,1.240919,0.0,0.0,0.0,0.0,0.0,0.626102,0.0,0.040017,1.875074,0.0,1.614621,0.042772,0.0,0.0,0.0,0.0,0.0,0.0,0.789668,0.0,0.465157,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.871358,7.534264,0.0,0.0,1.436784,0.0,1.060076,0.0,0.0,0.0,0.0,0.035269,0.048425,0.0,0.0,0.0,0.0,1.988526,3.241518,0.0,1.447046,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028402,0.0,2.16151,0.0,0.0,2.643764,0.0,0.0,0.0,0.789647,0.0,0.0,2.370163,0.0,1.49434,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.934303,0.0,0.0,0.0,0.044011,0.0,0.0,0.0,5.82354,0.0,0.980421,0.0,1.047185,0.0,1.678207,0.054605,0.0,0.0,0.0,0.0,0.0,0.036412,5.77152,0.0,0.0,0.737766,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.064024,0.0,0.0,0.0,0.0,0.0,2.105812,0.0,0.936661,0.0,0.0,0.0,0.0,3.392941,0.0,0.0,0.03731,1.927711,0.0,0.0,0.0,0.0,1.596142,1.347389,0.0,2.252746,0.0,0.0,0.0,0.0,1.071332,0.0,0.0,0.0,0.0,0.043312,0.030287,0.0,0.0,0.0,0.0,0.061006,1.705211,0.037342,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.10834,0.0,0.0,0.0,1.593034,1.790104,0.0,0.0,0.0,0.0,0.0,1.410341,0.0,0.0,0.520466,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.116209,0.036714,0.0,0.0,1.012771,0.0,0.0,2.724236,0.0,0.0,0.032391,0.0,0.0,0.0,1.460585,0.0,0.453506,0.0,0.0,0.032893,1.721495
TRANSPORTATION,0.0,0.586717,0.0,1.419727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.294025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1.362282,0.0,0.0,1.724815,0.0,0.0,0.084237,0.0,0.0,0.0,0.0,0.101334,0.020951,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047053,0.0,0.0,0.0,0.0,0.0,0.0,0.132353,0.921532,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.050766,3.385471,0.0,2.486306,0.0,0.0,0.0,0.0,4.1463,0.0,0.0,0.0,1.463431,0.0,0.082612,0.702276,0.132664,0.0,0.0,0.0,0.0,0.0,9.8964,0.040386,0.160163,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.676972,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.161724,0.0,0.0,0.0,0.0,0.921557,0.0,0.0,1.713799,0.81231,0.0,0.027467,0.18267,0.0,0.138708,0.0,0.0,0.0,0.569499,0.0,0.030947,0.0,0.0,1.378885,0.0,0.0,0.0,0.0,2.378774,0.0,0.0,0.0,0.0,0.0,2.156809,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045564,0.068193,0.0,2.720893,1.476871,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.130277,0.03087,0.0,3.20777,0.0,0.123401,0.0,0.0,1.89638,0.0,0.0,0.0,0.161903,0.055896,3.111071,0.0,0.0,0.0,0.0,0.09686,0.0,0.0,0.0,0.0,0.080121,0.0,0.0,0.0,0.052014,0.0,1.677122,0.0,0.0,0.037917,0.0,0.050048,0.0,0.0,1.506509,0.0,0.0,0.068983,0.0,0.0,0.0,0.0,0.0,0.0,0.109199,0.0,0.026994,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.044828,0.0,0.0,0.0,0.0,0.0,0.0,2.130395,0.0,0.033334,0.0,0.0,0.0,0.0,0.046327,0.922083,0.0,0.0,0.0,0.0,1.974314
DEVELOPMENT,1.044986,1.919735,0.0,1.295321,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.194771,2.118916,1.338945,6.358748,0.0,0.0,0.0,0.031086,0.0,1.913689,1.086864,0.0,0.0,0.0,2.205296,0,3.778465,0.0,0.0,0.0,2.35346,0.92522,0.0,0.0,1.561361,0.0,0.0,4.311228,0.0,1.426482,0.0,0.0,0.0,0.0,1.464489,0.0,13.721125,2.036626,0.0,0.0,0.0,0.044403,0.0,0.0,1.132851,0.0,3.056795,12.833678,0.0,0.0,0.958944,1.863007,0.0,0.0,0.0,4.44488,24.385852,0.112172,0.0,2.519505,0.0,5.940637,1.107379,0.0,0.0,0.0,0.0,0.0,2.365377,16.062075,0.0,15.146369,11.130291,1.28809,0.0,0.0,0.0,0.0,3.049796,0.0,1.066797,22.066375,9.969983,4.049806,1.664098,0.0,0.0,0.0,0.0,4.61442,0.0,1.59983,0.0,0.106817,0.0,1.831865,0.0,2.040383,10.171346,0.0,1.675985,2.98617,4.19785,0.0,0.0,0.061647,0.0,0.0,0.0,1.602258,10.037056,0.0,0.0,4.513379,0.0,4.649937,0.0,5.084847,0.0,5.314336,14.414143,0.0,17.416375,1.298117,0.0,0.0,2.199026,0.0,2.892896,0.0,10.22637,0.0,0.0,0.0,0.0,0.0,1.312225,3.309124,12.631284,0.0,4.396462,1.481727,14.958003,0.053734,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.093268,18.431634,0.0,0.0,37.612401,0.038,0.0,0.961915,5.074341,2.983986,0.0,0.948712,0.037547,0.113157,0.989189,2.388582,0.0,0.0,4.939673,0.0,2.441628,0.0,0.0,1.231138,0.0,9.848175,1.581853,3.131778,0.0,22.002975,4.753372,2.112683,0.0,0.0,0.0,1.499752,0.0,2.042256,2.595134,1.799742,0.0,0.028898,4.120881,0.0,1.225431,6.423812,0.0,0.0,1.737588,0.0,2.324261,5.405136,13.713963,0.0,0.0,1.049709,0.0,0.0,4.001532,0.0,0.286294,3.496889,0.0,0.111795,0.0,1.569895,0.0,3.736833,20.643819,0.0,0.112021,0.0,0.0,0.0,0.0,0.0,0.0,3.029396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.130934,0.0,3.350308,0.0,11.173398,0.0,11.08523,0.0,0.0,6.091761,2.113153,0.0,1.50356,1.257973,1.068569,0.0,2.560375,0.0,27.903891,0.0,0.0,0.0,19.019124
BUSINESS,6.560683,0.0,0.0,1.099268,0.0,0.0,0.061903,0.0,7.484903,0.0,0.0,0.0,90.531774,3.019656,15.030793,71.613258,0.0,2.500342,3.769908,1.084548,3.721261,1.624043,3.278374,0.0,0.781414,0.0,3.627005,0,5.945086,0.0,0.0,0.0,4.895881,0.785183,1.421236,0.0,2.113586,0.0,0.796871,126.387289,0.056861,1.16812,0.0,2.488412,1.950335,1.184341,2.821632,4.07005,3.268196,0.925084,1.192518,0.0,0.0,19.612745,7.621427,0.0,1.400915,5.368588,4.283348,36.127361,0.0,0.0,0.0,5.803207,0.0,0.0,2.369131,5.477982,25.61233,0.0,1.507726,29.094237,0.0,60.121037,9.189541,0.0,9.371348,0.0,4.156069,0.0,2.007366,12.236446,0.0,12.781252,2.651093,12.871471,0.0,0.0,0.0,1.671674,2.588194,0.0,9.572154,27.438016,2.653012,18.830157,4.839211,0.0,0.706691,0.0,1.594543,3.938531,0.0,12.946793,2.222011,0.0,0.0,2.610582,0.0,0.0,27.163349,3.199857,4.815514,1.318286,7.951398,1.120234,2.382314,0.0,0.0,0.0,0.068923,1.359748,25.81834,0.0,0.0,0.971372,0.0,1.234472,0.0,4.359604,0.0,24.643418,41.392114,4.11853,17.159708,25.936115,0.0,0.890176,4.193708,0.0,0.704828,1.418734,20.915775,0.0,11.447251,0.0,0.707331,0.516649,4.582648,5.564858,16.592522,0.0,6.853515,1.257461,18.551939,19.550135,0.0,0.0,0.72815,3.28566,1.993573,0.703562,0.8606,0.958425,17.14965,8.680793,0.0,32.149348,0.0,0.0,0.816325,6.518593,4.744472,0.0,0.80512,2.561962,0.0,0.0,2.002166,0.843658,0.0,6.824134,0.0,1.478226,0.46158,0.430942,5.625844,8.382319,7.150633,1.342432,6.53706,2.377265,28.14425,4.903071,1.792918,0.0,1.94119,0.0,1.660897,0.0,0.0,1.142607,4.789335,3.249522,1.897901,122.487375,0.0,6.203517,6.078841,1.960543,1.494256,4.268116,0.0,2.928989,1.338878,7.999667,0.0,1.079619,0.89083,0.430694,5.210935,1.624234,2.676341,3.238933,5.456653,0.0,4.792143,4.328512,3.900266,0.997974,6.866124,26.401199,0.0,13.89783,1.054333,1.659244,0.0,0.0,1.374906,0.0,8.465474,0.0,0.0,3.654534,0.0,0.0,0.397131,0.0,0.0,4.354126,0.0,1.909387,2.156535,3.220702,1.085584,22.340843,1.111763,0.0,22.131709,1.476796,3.045464,0.0,0.0,5.322342,0.0,4.304218,0.0,28.431402,0.0,0.0,0.0,18.100441
GOVERNMENT,43.933329,0.0,0.0,0.0,32.658257,2507.096907,0.0,69.117268,0.0,0.0,286.72124,0.0,0.0,12.170107,0.0,0.0,0.0,0.0,0.0,0.03227,0.0,0.0,43.707397,0.0,1.259345,0.0,0.0,0,2121.62753,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.195424,0.024761,0.0,0.051935,0.0,0.0,0.0,1.690327,49.432662,0.0,0.0,0.0,0.0,0.0,0.0,0.046093,0.0,0.0,0.0,0.0,0.0,31.04356,0.0,0.0,0.0,0.0,115.436945,562.642441,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,644.453554,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.049086,40.329614,0.0,0.0,0.0,0.0,0.0,0.0,1.536586,0.0,28.955708,18.522777,98.520309,0.0,0.0,0.0,0.058652,0.080026,0.0,0.0,0.110885,0.0,13.661407,0.0,0.0,26.742223,0.0,1.611889,16.311044,0.0,0.0,0.0,4308.943866,0.0,0.0,0.0,0.0,46.067325,0.0,0.0,0.0,2.24859,0.0,0.0,0.0,0.0,23.160575,34.866568,0.0,3781.007913,0.0,0.0,0.0,0.0,30.7631,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,55.16859,1671.237214,1.045879,0.0,0.0,0.0,0.0,0.05578,0.0,0.054293,0.0,0.0,0.0,0.0,0.0,0.0,13.884763,0.0,0.0,0.112172,0.039447,0.0,0.0,995.706003,0.0,0.0,0.0,0.038977,0.117466,0.0,0.030449,0.0,0.0,6.095495,0.0,0.0,0.0,0.0,0.044652,0.0,0.0,0.0,0.0,0.0,1409.334808,0.060418,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.048832,9.985599,0.0,0.029998,0.023668,0.0,0.042107,0.08165,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.540864,0.0,0.0,0.0,0.0,0.0,0.0,1835.654128,0.0,0.116052,0.0,0.0,0.0,5.479986,1308.731303,0.0,0.116287,17.673431,0.0,0.0,0.0,0.0,0.0,0.0,0.0,63.741642,0.0,2.016914,5.609897,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4355.759832
MARKETING,12.555977,2.54518,0.0,2.431645,0.0,0.0,0.0,0.0,1.471007,0.0,0.0,2.338125,5.089671,0.0,5.859378,8.986817,0.0,1.316214,3.11894,4.780159,1.566991,9.78935,6.139956,0.0,0.760194,0.0,0.0,0,1.984428,0.0,1.710966,0.0,0.0,0.0,0.0,3.480839,0.0,0.0,0.034862,25.942161,0.0,0.0,0.916188,0.0,6.028637,1.152179,6.814174,0.0,5.176505,1.350195,1.618157,2.293761,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.738046,0.0,0.0,0.0,1.388289,0.0,0.0,1.644525,1.348622,18.660607,0.0,0.0,0.0,4.455327,8.336728,9.988553,0.0,0.0,0.0,0.0,0.0,0.0,191.128644,0.0,143.81305,6.708851,2.708866,0.0,0.0,0.0,10.733566,7.65149,0.0,21.948433,91.682073,4.219658,43.516427,4.644347,0.0,0.993926,0.0,15.680481,7.359644,0.915447,1.381887,0.0,0.0,0.0,0.0,3.0751,4.062126,32.275439,0.0,4.544786,0.0,0.814299,1.835247,2.614737,0.0,0.97133,0.0,3.061326,2.101463,32.135267,1.710966,0.0,4.848699,0.0,0.0,0.0,28.478658,0.0,56.249419,41.057148,0.0,70.03147,4.376599,0.0,0.0,2.419944,0.0,1.301025,0.0,28.427006,0.0,2.956377,1.01957,0.0,0.0,8.916387,1.551359,56.454936,4.475594,1.228653,4.487487,43.84375,0.962382,0.0,0.0,0.0,0.0,24.32143,0.988768,0.0,0.038592,7.818675,0.0,0.0,20.812096,0.0,0.0,0.0,0.954126,53.3193,0.0,0.838885,9.903346,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.389684,1.386817,15.917868,7.678715,16.896938,6.650452,29.126872,8.809472,0.0,0.0,2.57472,0.85124,3.772184,0.0,9.216931,10.151452,0.0,2.039231,0.0,24.946684,0.0,7.391657,13.331033,3.303602,0.0,0.032101,0.0,4.672654,0.0,5.4282,0.0,1.050301,0.039578,0.0,0.0,2.491161,0.0,1.094531,1.70887,22.612386,0.0,31.097053,0.0,1.353527,0.0,27.047689,1.232244,0.0,0.0,1.221027,5.895569,7.541965,1.850752,0.0,27.42249,0.0,0.0,0.0,6.966404,0.0,0.0,0.0,15.74931,1.143461,2.896249,5.918406,1.496522,5.122577,0.0,30.559855,0.0,0.0,51.165279,3.715157,5.832404,0.03433,0.0,0.0,0.0,9.124183,4.0,18.14832,0.0,0.0,0.034863,67.833841
MEDICAL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.19471,0.0,0.0,0.0,0,2.01886,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.33686,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.115501,0.0,0.0,0.0,1.92013,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.884724,13.276079,1.38415,0.0,0.0,0.0,0.0,0.0,1.944363,0.0,0.0,3.662327,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.407098,0.0,0.0,1.727451,0.0,0.0,0.0,0.0,0.0,0.0,2.811771,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.632242,0.0,0.0,0.0,0.0,0.0,0.0,10.056612,0.0,0.0,2.156594,0.0,0.0,0.216827,0.0,0.0,0.0,0.0,0.05973,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.593748,0.0,0.0,0.0,2.938599,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.940786,0.0,0.188072,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.053138,5.491521,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.453332,0.0,0.0,3.208346,0.0,7.968777,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.749108,0.0,0.0,0.0,0.0,0.0,0.0,5.099516,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.779005,0.0,0.097538,0.0,0.824888,0.0,0.0,0.0,0.0,0.0,0.0,6.517457,0.0,0.0,0.0,0.0,0.137272,0.0,0.852482,0.0,0.0,0.0,0.0,0.0,0.0
COMMERCIAL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.818991,0.0,0.0,2.075199,0.0,0.0,0.0,0.0,1.879793,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.251015,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.733323,0.0,0.0,0.0,0.0,0.060203,0.0,0.0,0.0,0.0,0.0,0.0,1.491151,0.0,1.3889,0.067955,0.0,0.0,0.0,0.0,0.0,1.584253,0.0,0.0,1.077748,0.0,2.088131,0.0,0.0,0.0,0.0,1.875627,0.0,0.0,3.677046,0.0,2.70044,0.0,0.0,0.0,0.0,0.0,0.059467,0.0,0.0,0.0,4.2484,0.0,1.421799,1.692559,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.149478,1.28454,0.0,1.704649,0.0,0.163533,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.212805,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.754886,1.674786,0.0,5.021393,3.651618,0.0,0.0,0.0,0.0,0.0,0.0,0.044202,0.0,4.800296,0.0,0.0,0.0,0.0,0.0,1.609709,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.152522,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.179232,0.0,1.006794,0.0,0.0,0.958535,2.180524,0.0,0.0,0.0,0.996957,0.0,0.0,0.0,0.195779,0.0,0.0,0.0,0.0,0.0,0.0,1.577515,0.0,0.0,0.156678,6.196786,0.0,0.0,0.0,1.614296,0.0,1.919198,0.0,0.0,0.0,0.0,0.0,0.0,2.15163,0.0,0.0,0.942572,2.071291,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.104012,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.754437,0.0,0.0,0.04019,0.0,0.0,0.0,0.0,0.0,0.238349,0.0,0.0,0.0,0.0,0.053881,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047914,0.066959,0.0,1.238221,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.282592,0.0,0.0,0.0,5.784697


In [10]:
# get which industry keywords an ACCOUNTANT is most likely to be in
IndustryJob.find_row_matches('ACCOUNTANT').nlargest(10)

ACCOUNTING     16.278819
SERVICE        12.501763
FINANCE        12.400726
SERVICES       11.333154
CUSTOMER       11.148818
FINANCIAL       8.439801
SALES           2.839264
OUTSOURCING     2.347849
GOVERNMENT      2.256427
MARKETING       2.210900
dtype: float64

In [11]:
# find which jobs are prominent in the technology industry
IndustryJob.find_column_matches('TECHNOLOGY').nlargest(15)

DEVELOPER      13.287393
DEVELOPMENT     8.421630
ENGINEER        3.256493
ENGINEERS       2.928555
TECHNOLOGY      2.892026
ENGINE          2.638106
ENGINEERING     2.514598
SUPPORT         2.444317
INFORMATION     2.437383
SOFTWARE        2.235924
PROGRAMMER      2.189050
TECHNICAL       1.869096
LEVEL           1.865723
PROGRAM         1.656003
ANALYST         1.588862
dtype: float64

In [12]:
# find which jobs are prominent in the construction industry
IndustryJob.find_column_matches('CONSTRUCTION').nlargest(15)

ENGINEER       13.065265
ENGINEERS      12.376092
ENGINEERING    10.504959
ENGINE         10.404482
PROJECT         5.877905
WORK            3.597410
ELECTRICAL      3.113595
CIVIL           2.079308
SURVEYOR        1.907392
URGENT          1.856423
QUANTITY        1.820149
FACULTY         1.660779
SUPERVISOR      1.621363
MECHANICAL      1.399067
SALES           1.392364
dtype: float64

In [13]:
# find the recommendation score of BPO and customer service
IndustryJob.recommendation_score('BUSINESS PROCESS OUTSOURCING', 'CUSTOMER SERVICE')

765271.31069551338

In [14]:
# find the recommendation score of BPO and programmer
IndustryJob.recommendation_score('BUSINESS PROCESS OUTSOURCING', 'PROGRAMMER')

47623.118867802827