In [9]:
# import database stuff

from abelian import db
from abelian.models import Job, Company

db.configure('postgresql://kalibrr:password@localhost/kalibrr', False)
session = db.get_session()
session.rollback()

In [10]:
jobs = session.query(Job.industry, \
                     Job.name, \
                     Job.id). \
    filter(~Job.is_test, \
           Job.active, \
           Job.company_id==Company.id, 
           ~Company.is_test). \
    all()

In [80]:
job_industries, job_titles, _ = zip(*jobs)

job_industries = [s.encode('ascii', 'ignore') if isinstance(s, unicode) else '' for s in job_industries]
job_titles = [s.encode('ascii', 'ignore') if isinstance(s, unicode) else '' for s in job_titles]

In [81]:
%%time

from cleaning import clean

clean_job_industries = clean(job_industries, style='industry')
clean_job_titles = clean(job_titles, style='job title')

CPU times: user 9.76 s, sys: 10 ms, total: 9.77 s
Wall time: 9.76 s


In [82]:
%%time

from clustering import stem_cluster

job_industry_clusters = stem_cluster(clean_job_industries, mode=10)
job_title_clusters = stem_cluster(clean_job_titles, mode=10, length_at_least=4)

CPU times: user 107 ms, sys: 0 ns, total: 107 ms
Wall time: 106 ms


In [106]:
%%time

from analysis import LikelihoodMatrix

IndustryJob = LikelihoodMatrix(job_industry_clusters, job_title_clusters)

CPU times: user 80 ms, sys: 0 ns, total: 80 ms
Wall time: 81 ms


In [107]:
IndustryJob.dataframe.head()

Unnamed: 0,ENGINE,TESTER,MONTHLY,MARKETING,SG16,SG11,SG10,SG12,VOICE,WITH,...,CIVIL,MIGUEL,CALL,TRAINEE,GLOBAL,DEVELOPER,DAVAO,BUREAU,SPEAKING,OFFICER
OPERATIONS,0.0,0,0,0.0,0,0,0,0,0,0,...,0,0,0,0.0,0,0.195143,0,0,0.0,0
ENGINE,1.0,0,0,0.0,0,0,0,0,0,0,...,0,0,0,0.106272,0,0.0,0,0,0.0,0
CONSULTING,0.0,0,0,0.143736,0,0,0,0,0,0,...,0,0,0,0.0,0,0.0,0,0,0.034131,0
DEVELOPMENT,0.0,0,0,0.0,0,0,0,0,0,0,...,0,0,0,0.0,0,0.614401,0,0,0.0,0
BUSINESS,0.088721,0,0,0.0,0,0,0,0,0,0,...,0,0,0,0.073773,0,0.0,0,0,0.0,0


In [108]:
%%time
%%timeit

for industry, job in zip(clean_job_industries, clean_job_titles):
    IndustryJob.add_match(industry, job)

The slowest run took 26.83 times longer than the fastest. This could mean that an intermediate result is being cached 
1 loops, best of 3: 3.7 s per loop
CPU times: user 1min 50s, sys: 96.7 ms, total: 1min 50s
Wall time: 1min 50s


In [109]:
IndustryJob

Unnamed: 0,ENGINE,TESTER,MONTHLY,MARKETING,SG16,SG11,SG10,SG12,VOICE,WITH,SG18,PROVIDER,CUSTOMER,SYSTEM,OUTBOUND,SERVICE,POWER,ARTIST,INBOUND,GENERAL,COORDINATOR,ENGINEERING,ELECTRICAL,HUMAN,TREASURY,MANDARIN,ENGINEERS,ADMINISTRATIVE,AUDIT,FULL,LEVEL,RECRUITER,FACULTY,COLLECTION,DIRECTOR,SURVEYOR,CHIEF,PLANNING,REPRESENTATIVE,ASSISTANT,JUNIOR,RESOURCES,EXPERIENCED,RESEARCH,FRONT,PROCESS,AGENTS,CREDIT,PART,HEALTHCARE,ACCOUNTS,PROPERTY,TRAVEL,ATTORNEY,GRAPHIC,BASED,BUSINESS,GOVERNMENT,RESERVATIONS,SUPPORT,DIRECT,SERVICES,HIRING,COLL,TAGUIG,ARCHITECT,EXPERIENCE,ENGLISH,NURSE,QUEZON,SEARCH,CENTER,INVESTIGATOR,HOLDINGS,HOTEL,COMPANY,CASHIER,BOOKKEEPER,TELESALES,MANAGER,PRODUCTION,SPECIAL,INFORMATION,EXCISE,DESK,FOOD,MEDIA,INTERNET,DRIVER,TRAINING,RELATIONS,CEBU,SYSTEMS,OPERATOR,ACCOUNTING,POST,TECHNOLOGY,INVESTMENT,FINANCIAL,EXPERT,LEAD,REVENUE,MOBILE,SUCAT,PURCHASING,CREW,ACCOUNTANT,TIME,JAPAN,SENIOR,EDUCATION,QUALITY,TEACHER,STAFF,SPECIALIST,ACCOUNT,WORK,OFFICE,TECHNICAL,CARAGA,REGIONAL,REPORTS,AIDE,APPLICATION,ASSOCIATES,CHAT,TECHNICIAN,HOME,ENGINEER,ADMINISTRATOR,MANAGEMENT,FINANCE,AREA,EXECUTIVE,AGENT,TECH,SPANISH,PERSONNEL,APPLY,ANALYST,DAYSHIFT,QUANTITY,CENTRAL,BILLING,CITY,ADMIN,MECHANICAL,SITE,CONTENT,DEPARTMENT,CORP,INVENTORY,NEED,PROGRAMMER,CARD,CARE,EARN,JOIN,PIONEER,CONSULTANT,CLERK,WRITER,DESIGN,AUDITOR,OPERATIONS,ORTIGAS,SUPERVISOR,INTERNSHIP,SHIFT,MEDICAL,REPRESENTATIVES,BUDGET,URGENT,INTERN,DESIGNER,BASIC,DATA,ASSURANCE,SECRETARY,CONTROL,RESOURCE,MAKATI,TELCO,CORPORATE,ROCKWELL,ADMINISTRATION,ENCODER,RECRUITMENT,RECEPTIONIST,TEAM,PROGRAM,ASSISTANTS,ASSESSMENT,STATISTICIAN,SOFTWARE,DIVISION,HEAD,CABLE,SOCIAL,TELEPERFORMANCE,LEGAL,JAPANESE,PROJECT,COMPUTER,MUOZ,DIGITAL,LOCAL,STORE,DEVELOPMENT,LOOKING,PRODUCT,JAVA,ASSOCIATE,TELECOM,AYALA,SALES,FIELD,CIVIL,MIGUEL,CALL,TRAINEE,GLOBAL,DEVELOPER,DAVAO,BUREAU,SPEAKING,OFFICER
OPERATIONS,29.581501,6.114256,1.133185,8.062127,0.0,0.0,0.0,0.0,6.218107,0.220028,0.0,4.544258,6.521367,4.887123,2.019784,23.686573,0.161847,6.747945,1.944357,12.190801,16.158935,30.571476,5.826271,11.353056,0.961422,11.315271,33.072445,33.340542,1.078421,1.105675,21.322319,3.028243,2.081199,16.7064,9.324553,1.230318,3.33707,4.868844,20.356293,51.082962,5.578412,4.625559,12.335106,3.417604,5.832436,5.224154,13.527987,1.628151,3.396188,2.171277,13.516475,23.923036,7.592701,2.566123,3.28544,4.331505,12.739954,4.677126,28.611995,19.341768,5.680882,21.550358,3.899266,0.826467,0.376735,4.523543,13.249284,9.769548,0.619773,0.718632,1.03037,5.751266,5.870758,2.042177,1.583073,4.820163,1.959274,5.240206,14.68246,81.31039,22.395285,11.166116,28.177558,0.784134,2.894268,1.337087,0.269204,5.033484,12.330374,11.260102,27.079224,1.769135,4.390062,46.970984,15.847641,0.0,14.833999,5.18124,6.144037,11.023942,5.492037,7.850635,4.038608,2.583049,5.518626,5.616695,22.85897,0.664424,2.20376,8.753511,25.836494,5.848263,9.497763,13.447003,18.136598,14.72768,5.351338,34.78799,15.2543,0.347602,6.586301,6.006318,3.699542,25.358618,9.034636,2.32834,12.735682,3.488079,35.276317,26.970464,58.170515,7.001872,5.018886,20.831583,16.047299,9.506891,3.603151,9.997845,4.07955,22.578596,1.703276,6.695735,6.065004,1.93726,2.760883,11.123512,7.786376,2.034809,7.293888,5.155817,0.0,6.807414,5.611664,16.354827,0.553414,3.209871,0.439246,0.229787,14.12404,20.767859,6.735634,1.992652,3.530211,3.663753,66.139157,1.737133,34.371666,4.556927,2.259704,3.124541,19.593045,0.021267,6.798903,5.446748,5.745666,1.338247,7.27307,12.908314,7.533621,7.414017,4.756341,5.363943,1.12096,5.655058,0.212972,40.659275,3.896909,4.947874,24.23347,3.606679,15.38111,47.634508,8.875262,13.568507,10.967697,7.47848,5.73867,1.335156,6.011147,17.975011,0.394066,3.802154,19.012901,3.549883,0.160711,0.242504,1.596356,11.300784,42.437792,1.453349,10.00627,4.921581,8.80261,2.811346,0.348955,19.81199,8.115232,1.534476,0.404798,3.600972,21.396294,0.436518,67.773207,2.795889,0.661599,2.665328,35.870438
ENGINE,109.707151,5.127969,1.287357,5.727244,0.0,0.0,0.0,0.0,3.405859,0.327099,0.0,3.022853,9.218079,5.547316,1.804001,14.493522,7.958983,2.2578,1.220159,3.667893,13.640014,116.200744,21.458714,3.750323,1.586074,4.668425,124.571718,8.353742,0.778914,2.699404,10.918907,3.340003,6.760791,13.035665,6.570499,17.577298,3.204107,19.479811,16.497997,14.4515,5.58595,1.594889,4.073204,6.223284,2.19376,3.917478,8.364528,1.361879,1.883375,1.493927,6.325039,11.996032,3.707732,1.728533,1.205016,1.894227,15.299872,2.977713,10.734976,6.415596,5.452004,13.311209,8.124872,1.264496,0.826524,11.243752,4.382756,15.12569,0.0,1.781226,3.749151,7.230381,6.462719,16.43159,0.50609,2.00953,0.840712,2.358285,3.317887,33.294393,11.846213,12.232793,10.532974,0.129043,3.101849,0.070887,0.625709,4.475838,2.257023,9.93938,6.523115,0.130902,5.044016,22.404816,10.297317,1.278782,7.812466,4.110331,5.2915,3.597683,1.247336,2.662445,0.225351,1.527845,6.491332,0.535482,10.0458,1.242837,3.575356,14.232686,7.287313,11.756997,0.577133,4.943659,17.10186,6.696521,3.354609,10.321435,19.189686,0.602154,2.499705,3.006623,0.609876,10.58143,4.837234,3.512722,13.665477,1.415287,135.768211,9.072557,26.050932,2.296085,3.543583,5.531735,9.231063,9.862907,2.731738,5.335308,2.394269,9.142312,1.912335,15.104454,4.114066,4.281849,3.838188,0.79836,22.125037,4.99794,7.164733,4.988505,6.959961,4.02169,18.614769,10.990975,0.252816,2.819839,1.384121,0.294726,38.289089,5.961064,0.964165,5.939522,13.905683,2.928396,20.664301,0.590472,10.945086,3.650521,2.322196,7.29579,15.869227,0.007818,4.2872,4.85255,13.629468,0.562055,1.158735,3.113937,2.149559,8.677713,1.575008,1.497902,0.861012,6.017675,0.090261,8.717292,0.292868,3.493749,5.915234,0.082502,9.570656,13.874031,5.17466,8.537635,7.063153,3.606544,0.335957,0.187014,5.957039,7.067256,0.044327,3.780362,35.457291,3.287526,0.05908,0.137491,4.838443,5.089233,24.023269,5.266636,8.133001,0.763359,4.625493,5.373854,0.353096,3.089523,3.292911,6.605164,1.249014,4.584269,39.77287,10.125642,34.723987,0.177095,0.296791,4.234432,11.104612
CONSULTING,17.667127,4.570502,1.121693,39.227262,0.0,0.0,0.0,0.0,4.279305,0.218986,0.0,2.167261,9.780178,1.699967,6.199509,13.719714,0.606466,4.441163,5.881637,4.108822,6.160295,20.383792,4.812489,5.379787,3.088075,3.768243,20.46291,14.336033,7.984625,0.375839,2.929858,8.447789,1.130338,6.777854,6.001069,2.204906,2.659086,7.359359,21.393731,30.959244,5.958763,3.80734,3.181415,5.090386,2.055953,3.283218,8.783721,3.882986,1.457935,2.213664,47.322604,4.197858,2.091499,1.451867,1.034024,2.219962,6.968476,3.665306,11.610206,5.629589,4.971161,12.775149,6.752102,1.160119,0.408606,3.890532,3.431246,3.261274,0.711429,0.798107,2.236341,8.701389,3.123204,4.749649,0.766059,4.964329,5.42706,4.76093,10.778809,38.319878,6.157931,15.200877,8.115714,0.244149,1.71011,0.188718,3.964214,6.762579,3.999425,7.442682,7.347777,0.2995,1.48097,9.389176,64.630447,0.841528,3.854488,4.639551,14.258952,2.739745,1.676943,0.915128,0.400566,0.483523,6.53871,1.250467,52.719495,0.947453,1.193299,9.19688,6.653184,3.239459,1.036363,20.734509,20.068444,52.747486,2.536465,22.301415,5.188904,0.495714,3.073811,3.043446,0.917614,6.443784,17.576158,1.172545,5.487678,2.525317,20.542347,10.071715,28.673516,11.47261,2.512109,13.806061,9.877087,2.738068,1.934153,7.512955,0.749316,17.050169,1.054733,8.415494,4.050632,4.52434,1.381597,3.528998,4.425922,3.719946,12.847903,4.366577,1.310906,4.700358,2.802345,5.378104,0.536028,2.312614,0.451199,0.329825,6.739513,17.238649,3.829176,15.11859,1.899669,10.177375,13.251469,0.984806,12.168554,6.409129,1.36931,3.912988,20.627124,0.707141,5.349439,8.602366,2.443412,0.695453,3.185262,5.99376,2.641572,6.722779,3.584057,2.367282,0.902276,4.149225,0.257369,12.550717,4.520885,7.568283,12.043638,0.586856,3.755227,29.144646,6.204743,10.022871,1.399629,5.265208,3.819122,1.35165,15.540716,7.499557,0.345648,2.29769,7.666902,3.451878,0.04509,3.788717,1.643689,4.524357,8.385967,4.58524,2.588897,0.328583,18.050665,2.606757,0.165522,14.485665,2.187064,1.838526,0.796327,1.662249,9.750869,0.708762,7.986096,0.17075,0.648389,4.829828,23.11576
DEVELOPMENT,11.081202,5.531019,0.415426,20.058782,85.384806,290.604362,92.171611,89.00087,57.751998,0.026418,106.461367,2.494609,7.273875,6.47666,3.084768,56.835917,0.456884,28.335974,2.50551,2.652963,9.409622,11.654192,1.748864,5.529388,1.459318,8.656847,12.51683,257.167681,0.449594,0.0,65.630143,5.420164,1.866103,39.231743,5.906962,1.990296,0.828136,2.047167,65.079643,216.413081,2.673353,2.856954,3.311883,16.271763,2.665239,4.630749,8.194014,1.972112,1.565703,4.234361,19.702795,4.89621,20.481537,63.60988,0.977019,5.117906,28.863954,5.681425,19.569475,5.433076,1.078244,51.128225,3.130745,74.730599,0.048564,5.08126,3.558537,1.657756,0.149229,1.561561,11.542361,8.409449,11.19091,3.026785,0.259423,1.133272,3.220658,1.262749,9.177756,38.211585,13.821229,11.052843,16.291135,11.320177,0.375069,1.016926,0.193327,13.289108,18.50478,31.705605,15.268415,0.121756,5.942884,27.531825,20.363669,0.167522,3.023665,11.178625,8.948199,2.227519,0.897701,478.893565,0.786751,0.709447,6.117007,4.152472,56.77476,0.044104,0.286379,5.798433,15.948104,6.973709,0.760839,25.758621,30.722988,21.827876,0.767539,444.820216,3.897877,0.657537,3.606523,4.553247,4.480267,14.519637,40.582935,0.935817,5.114737,1.963191,12.715518,203.987201,29.658471,8.025339,3.00961,56.24204,9.536244,2.214827,14.350162,3.130595,0.834897,27.402365,0.472316,13.912543,17.273637,3.529402,2.296313,117.570307,2.847165,2.542517,6.094605,6.532081,2.679003,52.30137,1.437317,8.168026,2.228663,4.07968,2.186002,0.015124,4.132688,47.193979,2.435134,6.40672,0.108441,5.332618,49.021784,1.731167,10.33528,6.862324,0.656646,0.977442,62.325088,0.0,4.808885,9.765103,0.842018,1.21972,3.754613,37.358648,15.548864,2.642084,2.92502,11.779581,0.28765,17.514045,0.571783,219.585449,2.417945,7.301856,41.332984,0.226281,6.009306,201.405225,34.515547,39.42215,1.014857,1.845517,1.1849,0.506834,10.576963,7.99771,7.393384,4.279556,3.632707,2.684562,0.0,0.436095,1.522826,9.411769,69.415214,2.194729,12.431109,0.037756,43.933556,0.877483,0.056477,13.140965,3.066882,0.21501,1.449049,1.617954,23.662791,0.29571,64.516931,0.162746,0.634464,2.458735,514.064454
BUSINESS,20.736221,5.211232,0.954623,19.981266,0.0,0.0,0.0,0.0,20.665619,0.029021,0.0,2.123941,94.960676,5.208701,19.337756,80.019325,1.025349,9.001482,11.958722,9.29556,5.737025,25.641776,9.784808,5.53228,1.962304,10.356961,23.880319,45.277427,0.324929,0.239495,14.346529,9.437454,3.033214,6.997133,2.383643,3.791403,0.971143,7.516122,140.610359,37.127068,3.672937,9.545925,7.592896,25.959064,1.768607,7.188041,39.238813,2.224533,2.463512,6.583242,43.283924,6.702597,11.256041,0.951414,3.512574,6.700026,31.89227,11.480806,48.04568,35.102727,2.091289,72.12692,15.40107,0.856933,10.037443,5.481781,7.962709,4.471158,0.0,4.042251,3.341817,29.358251,4.586351,4.593291,3.240332,3.832471,3.96342,2.862114,21.399191,38.6321,9.951851,23.470819,18.88559,0.011449,2.465793,0.812069,2.416072,11.949288,10.631384,18.773327,15.115174,1.589698,4.522432,9.736495,41.642249,3.970621,14.57757,15.224338,14.768229,4.724519,3.144352,2.98034,1.09912,0.69095,7.025007,3.092446,49.436835,0.110266,2.521988,13.199154,13.851966,7.180805,0.84349,10.11015,32.157142,46.503073,4.661858,30.921183,33.255548,0.963676,6.828869,23.062687,0.591861,15.264996,26.826024,0.727036,24.205109,17.348202,25.101305,15.218761,36.378575,9.292075,5.93125,40.490299,41.931861,15.572795,4.668326,5.601957,4.559843,22.274172,11.213373,4.989325,16.621418,8.324718,4.53721,7.433731,15.064903,3.963099,15.36315,13.143973,1.424659,10.480116,5.591794,16.230276,2.972135,7.034513,1.752272,1.368603,15.323992,14.232457,1.881816,10.227315,4.186146,1.03613,20.954138,4.371818,22.733096,10.114784,9.386339,11.858944,135.852991,0.130224,25.367986,14.562894,4.932383,2.67657,3.882279,8.732703,9.479177,2.888028,9.974534,7.688451,10.433382,7.889374,1.063848,23.256185,2.620434,16.446012,14.302387,1.001922,9.273436,36.260146,28.794782,27.710311,4.625739,2.867656,1.479424,2.250707,20.250261,12.371851,0.553037,14.010424,5.059556,4.04832,0.717476,0.135838,8.758188,14.378897,42.941382,8.189401,6.060469,1.154166,28.646227,6.372243,0.223679,25.530239,1.762643,0.801575,0.389447,13.311678,17.942731,1.411257,33.598859,0.267761,1.115565,6.985964,30.60226
GOVERNMENT,46.765235,5.879872,0.237765,3.980585,770.784536,2623.339665,832.05029,803.427416,484.135109,0.027261,961.046571,6.555758,3.237775,13.500884,3.456831,396.935338,0.116673,212.607304,3.634089,2.474797,31.201218,46.756949,1.03859,3.546517,0.305894,2.665692,52.679653,2165.096668,0.497898,0.0,478.922643,1.980721,0.412815,324.152487,36.351203,0.261077,0.491921,3.196532,430.704527,1632.548977,0.620922,2.875398,3.921292,2.773565,0.824822,1.884961,4.341315,0.720548,0.555986,0.708748,35.609337,2.666173,123.720446,567.032418,0.726714,0.790852,8.752403,2.623562,73.073219,2.367282,1.232802,355.100832,1.389945,673.116794,0.050114,1.431594,4.216591,5.551235,0.15399,0.172984,1.478704,3.659427,65.939558,0.891159,0.904055,2.84116,0.664711,0.827501,4.249565,19.148694,8.825229,33.157636,87.196286,98.624644,0.462299,0.376015,0.36482,55.869764,139.462657,231.770604,85.169654,0.12564,14.799227,162.103196,32.175057,0.172866,2.821749,18.840188,9.88511,1.553761,0.830865,4293.981188,0.391786,1.173121,1.933659,1.102652,319.144197,0.045511,0.362473,1.547769,87.76385,1.799918,2.424787,131.898207,169.552572,40.000854,0.237143,3796.742314,2.639568,0.129511,3.353812,1.340634,31.540213,74.188931,231.613169,0.375637,10.035059,0.694698,58.297838,1722.953038,14.548573,3.675536,0.772724,324.869881,5.12703,1.862741,123.240587,2.326721,0.386825,21.785646,0.298434,88.768927,117.364515,2.006611,0.462818,993.562673,1.27642,0.703321,4.35574,1.913151,0.324192,420.662303,8.130482,7.789374,0.302201,0.803867,0.281511,0.036786,17.453517,268.310343,1.188064,3.014149,0.213605,36.151575,329.677085,9.689819,5.461625,3.943678,0.381595,0.496201,411.957872,0.028497,2.471973,5.915183,0.503382,0.281764,2.396473,237.572216,3.179693,1.93521,2.756541,95.054562,0.933182,96.511142,1.065604,1848.64272,3.704376,2.268046,312.717042,0.347811,6.976591,1515.368678,220.307538,279.247841,0.370477,2.208667,0.809225,0.137963,13.300665,8.780175,63.969324,1.135278,4.600545,6.446736,0.0,0.527865,0.278385,48.606176,268.539461,1.313569,2.992832,0.038961,251.725804,2.128531,0.058279,5.340134,1.25901,0.266691,0.33907,0.556359,134.212343,0.305143,310.113802,0.167937,0.261934,3.743053,4404.457186
MEDICAL,2.404789,0.234653,0.153697,3.698376,0.0,0.0,0.0,0.0,0.366517,0.054283,0.0,0.568588,1.172863,0.034219,0.480539,2.82587,0.412701,7.539329,0.431658,1.724165,0.756069,3.815593,1.982331,0.204526,0.392085,0.711693,3.342047,7.028367,0.121173,0.0,1.486896,0.75954,0.110106,1.993424,5.572613,0.0,0.348684,1.599819,14.271569,13.095045,0.940116,0.957467,0.953887,3.545334,0.235249,1.557537,1.492326,1.234985,2.784729,0.413892,3.843923,1.447278,0.743881,0.505425,12.553313,0.0,0.777124,0.851414,4.107574,0.845376,4.256247,2.655555,1.089796,0.142607,0.0,3.353058,1.016181,0.582499,14.068096,0.0,1.594239,1.978596,0.772578,0.552248,0.218219,2.128709,1.01708,0.386246,6.419381,1.890555,2.206457,1.118038,1.770871,0.392186,3.000924,0.0,4.177321,1.245095,1.04777,2.79057,2.977774,0.0,0.029564,3.281802,5.943296,0.01484,1.257402,0.925448,0.911914,1.122862,0.292528,0.302042,0.342113,0.075412,2.678958,0.041267,6.213309,0.442858,0.911665,2.81414,1.895413,0.750646,0.253371,11.812163,2.878327,4.245173,0.747372,1.556327,2.726827,0.0,0.463268,1.132779,0.0,1.895806,1.928319,0.679407,1.916312,0.122778,3.571811,4.626922,2.45164,0.6207,3.03428,3.193878,1.542041,1.286236,0.47456,0.536733,0.0,0.644847,0.365515,1.363632,1.351626,1.709526,0.055669,1.408952,2.344527,0.397455,2.276153,2.058847,0.0,1.502318,0.681434,1.684336,0.0,0.983593,0.121543,0.114475,2.727584,2.541962,1.381305,0.865392,14.056102,0.907994,3.989149,0.816806,5.99947,1.779642,0.421159,5.158882,13.596723,0.0,0.955467,1.669201,17.0329,0.073116,0.0,0.866387,0.066206,0.647663,0.920428,0.839102,0.035234,0.395821,0.116985,5.113552,0.378297,1.09022,2.118575,0.705792,1.793504,12.219944,1.856748,5.95291,0.982714,1.07422,0.09207,0.0,1.7768,0.865781,0.0,1.299167,1.874222,0.756035,0.0,1.450037,0.645109,0.939139,3.127398,0.645562,1.438082,0.0,2.097888,0.587817,0.0,9.204222,0.0,0.392504,0.0,0.717576,2.241358,0.015593,3.610605,0.0,0.495833,0.719867,1.767672
COMMERCIAL,3.46862,1.975634,2.143061,6.41643,0.0,0.0,0.0,0.0,6.615946,0.487004,0.0,1.524437,44.262387,0.686616,5.882162,37.399864,0.107551,2.394388,4.980643,4.869608,1.229686,4.828839,4.803464,1.635172,0.538672,1.937528,3.932003,13.285988,0.603421,0.62564,3.532441,2.432826,0.887914,3.613306,1.458127,0.218072,0.076862,1.256773,50.879076,16.428138,0.83792,3.711,6.136483,9.507701,1.114124,1.372422,17.638793,1.937098,0.898492,2.693917,27.126206,4.77835,2.677151,0.73055,0.911573,1.193438,2.878352,3.606379,21.723241,17.279078,1.262921,33.830757,3.956409,0.776853,1.773383,2.740625,6.523216,1.713101,0.767924,5.018705,1.208458,12.676336,1.221503,1.168156,2.557612,1.129442,0.878363,2.424427,10.671818,11.416026,2.937312,8.314016,10.323798,0.23217,1.286029,0.340833,1.125311,6.285839,3.447878,2.811644,6.538567,0.864459,0.593212,5.01951,24.479663,0.729309,9.464988,4.466233,7.136353,4.775903,0.765849,0.861197,0.369581,0.569359,2.431843,0.712199,30.370382,0.08463,0.773952,4.675217,5.435912,1.606993,0.384604,3.071724,10.818038,29.418535,0.692583,15.488815,20.292024,0.72179,1.627849,8.546848,0.331237,6.37266,5.266911,1.579508,16.47545,7.632473,3.997646,2.788732,10.62272,5.717239,1.67821,9.379748,18.199431,11.299723,1.122333,2.585863,1.600331,3.008195,5.762812,2.32892,6.174102,4.221297,3.360613,0.915642,9.073074,4.706202,6.102909,3.927515,0.998648,3.042103,1.245688,5.281839,1.800578,3.460644,1.126385,1.921317,2.709375,13.215249,0.410079,2.676434,2.288924,0.678064,10.920256,2.316941,14.205929,5.0208,4.170885,5.468633,49.27734,0.034497,9.608799,6.930556,3.084753,1.873265,0.543708,4.074585,1.04679,1.364517,3.856887,7.402716,6.487437,4.042809,0.931551,6.670424,0.707552,4.070914,4.744114,0.483818,2.323749,16.16558,5.141489,11.036359,1.553204,1.163871,0.27439,3.006565,5.701989,14.785367,0.086755,3.872847,0.944873,1.580782,0.739782,0.185488,3.043904,6.245757,9.834528,2.505486,0.897263,0.420876,5.605104,5.269038,0.607669,11.201088,0.315962,0.017582,0.053252,7.101756,2.801516,0.501369,10.49965,0.467591,0.307056,2.921464,16.131779
OUTSOURCING,15.442211,6.769545,1.511546,16.251428,0.0,0.0,0.0,0.0,24.099449,0.078952,0.0,2.093007,122.633014,3.543712,26.443338,99.743254,0.756903,11.242334,16.556128,15.845008,6.578285,22.569555,11.27904,9.520895,2.047568,9.826175,19.252783,57.244933,1.712363,0.1918,10.974879,27.784431,1.676468,9.348388,6.806838,3.32199,1.073094,9.062351,177.644925,39.533596,4.68929,19.914529,8.563693,31.013351,1.082088,6.080335,48.135263,2.200454,2.809096,9.80854,63.186268,8.639261,9.724679,3.59479,3.514874,7.651494,9.430555,13.505651,60.348371,43.847504,6.42695,90.313183,18.810166,1.407717,11.861146,6.459567,8.97988,7.759541,5.524079,6.172797,2.506533,36.699623,4.475254,5.406732,4.084037,5.864322,3.653339,4.445397,23.411954,33.795467,9.832216,40.936866,24.316246,0.042215,3.552339,0.231775,4.397114,16.535479,13.293787,23.152565,19.177537,2.690753,3.072551,12.414938,62.207719,5.451396,19.511866,17.964808,19.19629,6.347688,3.94626,1.990394,0.709087,0.707926,6.474119,0.82064,68.825444,0.556365,2.966451,17.904265,17.67091,8.770351,2.439805,19.599716,56.297358,68.185697,5.562988,34.316629,43.775153,1.633351,9.74823,29.331495,0.566075,20.141344,33.985422,0.613714,33.579647,23.603866,19.851771,20.109916,35.913399,8.956262,6.611341,41.601581,50.482801,20.844442,6.140907,7.358196,6.128868,17.589303,15.236187,5.425816,20.580494,8.1223,5.600323,11.565158,18.049986,5.787427,19.087354,16.682993,0.86382,10.489316,6.190763,15.871062,3.017292,10.747634,2.512935,2.382973,17.563929,13.091048,1.940895,13.437973,4.510368,3.069822,27.313721,5.492885,35.280361,14.300073,12.823649,15.271987,171.905393,0.789169,30.340747,19.360548,5.713381,4.051681,2.436698,7.91114,3.037272,3.722559,20.300302,10.38672,15.232812,8.153303,0.799279,31.275501,2.596525,38.154204,23.854346,2.262643,7.809491,38.768008,30.535504,35.962988,6.058197,4.552956,2.373122,4.272564,29.504957,14.19558,0.734147,14.627625,5.051723,5.15044,1.623744,2.215403,9.537878,18.027464,27.949915,9.94159,3.534379,1.816812,35.988666,7.196277,0.259302,27.459639,2.351702,0.560742,0.523544,15.727307,16.744068,1.642808,26.216121,0.427102,0.950933,10.737745,34.273254
BPO,3.193135,2.844297,0.233539,0.486451,0.0,0.0,0.0,0.0,1.46896,0.265444,0.0,0.407186,17.620881,0.0,3.101205,12.828977,0.0,0.606348,3.166441,0.0,0.044791,3.885245,0.376164,0.454089,0.697983,4.197181,3.842343,3.949415,0.0,0.0,0.0,0.162172,0.0,0.272044,0.0,0.428097,0.0,0.230938,17.363128,7.856383,0.0,0.907535,0.987337,2.998664,0.317554,2.868236,1.152774,0.0,0.0,0.702673,2.04471,0.628059,0.0,0.0,0.0,0.0,1.243134,0.51667,6.064296,1.915587,0.0,11.66289,0.681659,0.0,0.0,0.17654,1.067258,0.863148,0.0,0.0,0.0,2.774723,0.555137,0.229426,0.0,0.643407,0.0,4.019694,1.015974,1.313764,0.672355,1.716787,1.418847,0.0,0.0,0.0,0.07216,0.592035,1.062825,0.246554,0.933206,0.0,0.0,0.210706,2.109504,0.406174,1.284345,1.035713,1.188998,0.465224,0.0,0.0,0.425423,0.0,0.202945,0.0,4.109655,0.0,3.077765,1.25093,0.947823,5.884987,0.0,0.639775,3.012386,2.105993,0.0,1.320139,2.451175,0.195265,2.343023,1.659507,0.0,0.932001,1.440942,0.0,1.733799,2.799481,4.288721,0.594536,1.370645,2.098025,1.47143,2.463126,1.14361,1.271187,3.966109,0.167309,0.0,0.621348,0.0,1.329705,1.268887,1.594276,0.781355,0.0,1.163838,0.0,1.719447,0.523996,0.0,0.557091,2.636559,1.548541,0.304122,1.669866,0.0,0.0,1.665425,1.475607,0.0,0.215091,0.0,0.0,1.137637,0.167213,3.282555,0.494174,0.0,0.490644,16.532338,0.0,0.766641,0.793695,0.0,0.0,0.0,6.619913,0.0,0.765638,0.801354,0.74126,0.0,0.151812,0.174879,1.336112,0.040531,0.518594,0.806819,0.0,0.480439,7.620774,1.746348,2.996647,0.432859,1.348251,0.0,0.0,0.573369,1.986614,0.0,5.16314,0.456998,0.161829,0.0,0.0,0.191286,2.875241,0.500697,0.587221,0.483365,0.0,0.883805,0.0,0.0,1.488786,0.0,0.0,0.0,0.269965,1.370094,1.861322,0.1844,0.0,0.332562,0.39094,1.137628


In [110]:
# get which industry keywords an ACCOUNTANT is most likely to be in
IndustryJob.find_row_matches('ACCOUNTANT').nlargest(10)

# note: interestingly enough,
# being an ACCOUNTANT doesn't mean you're in the ACCOUNTING industry

SERVICE        14.441707
SERVICES       13.091308
GOVERNMENT      9.006344
CUSTOMER        8.476698
ACCOUNTING      7.289796
FINANCE         5.350620
FINANCIAL       3.651275
MARKETING       2.488622
OUTSOURCING     1.970706
WRITING         1.834793
dtype: float64

In [111]:
# find which jobs are prominent in the technology industry
IndustryJob.find_column_matches('TECHNOLOGY').nlargest(15)

DEVELOPER      8.157250
DEVELOPMENT    5.229969
LEVEL          2.727529
OPERATIONS     2.510587
OPERATOR       2.070660
TECHNOLOGY     2.062974
ENGINEER       2.058706
ENGINEERS      1.905689
INFORMATION    1.816720
PROPERTY       1.738549
TECHNICAL      1.688701
ENGINEERING    1.679832
ENGINE         1.656153
SUPPORT        1.541888
PROGRAMMER     1.408185
dtype: float64

In [113]:
# find which jobs are prominent in the construction industry
IndustryJob.find_column_matches('CONSTRUCTION').nlargest(15)

ENGINEER          2.914038
DEVELOPER         2.795045
ENGINEERS         2.776029
ENGINEERING       2.433698
ENGINE            2.419496
MANAGER           1.877857
DEVELOPMENT       1.858445
OPERATIONS        1.811116
ASSISTANT         1.432472
MANAGEMENT        1.405912
ASSISTANTS        1.339402
OFFICER           1.303451
ADMINISTRATION    1.297649
OPERATOR          1.273512
OFFICE            1.215681
dtype: float64

In [112]:
# find the recommendation score of BPO and customer service
IndustryJob.recommendation_score('BUSINESS PROCESS OUTSOURCING', 'CUSTOMER SERVICE')

687624.57952877693

In [114]:
# find the recommendation score of BPO and programmer
IndustryJob.recommendation_score('BUSINESS PROCESS OUTSOURCING', 'PROGRAMMER')

114763.14103969297