In [3]:
import pandas as pd
import numpy as np
import os.path

data_folder = "../data"

In [4]:
data_file = os.path.join(data_folder, 'joes-network.csv')

profiles = pd.read_csv(data_file, index_col=0)
profiles.count()

name                     1230
occupation               1230
Job title                  23
linkedin_id              1228
email                    1122
phone                     282
Personal website          392
location                 1230
company_link             1121
company_linkedin_id       953
company_linkedin_name     945
company_description       923
company_size              891
company_specialties       675
company_established       740
company_industry          900
company_website           886
company_headquarter       888
company_logo_url            0
Relationship               15
dtype: int64

In [18]:
df = profiles.copy()
df['occupation'].value_counts().head(60).iloc[::-1]

VP People - interim at Commonplace                                                                                          1
Digital Director at Methods                                                                                                 1
Senior Risk Analyst at Alpha FX Group plc                                                                                   1
Digital Strategist                                                                                                          1
Partner Account Manager, EMEA at Duo Security                                                                               1
Developer at Ellen MacArthur Foundation                                                                                     1
Delivery Driver at Homebase Bunnings                                                                                        1
MS in Sports Administration Graduate | Looking For a New Career Opportunity in Gaming                                 

In [105]:
def generalize_occupation(linkedin_occupation):
    
    mappings = [
        {
            'words': ['ceo ','coo ', 'cto ', 'cfo ', 'chief', 'executive'],
            'role': 'C-executive'
        },
        {
            'words': ['founder'],
            'role': 'Founder/Co-Founder'
        },
        {
            'words': ['director'],
            'role': 'Director'
        },
        {
            'words': ['adviser'],
            'role': 'Non-executive'
        },
        {
            'words': ['vice president', 'vp ', 'vp,'],
            'role': 'VP'
        },
        {
            'words': ['head of', 'head at', 'lead ', 'lead,', 'manager'],
            'role': 'Top-level-influencer'
        },
        {
            'words': ['consultant', 'designer', ' engineer', 'engineer ', ' developer'],
            'role': 'Mid-level-influencer'
        },
        ]

    for mapping in mappings:
        if any(word in linkedin_occupation.lower() for word in mapping['words']):
            return mapping['role']

    return ''

In [104]:
df['occupation'] = df['occupation'].fillna('')

df['probable_role'] = np.vectorize(generalize_occupation)(df['occupation'])

df['probable_role'] = df['probable_role'].replace(' ', np.nan).replace('', np.nan)
df['probable_role'].value_counts().head(60) #.head(150).iloc[::-1]

Mid-level-influencer    298
Top-level-influencer    214
Director                102
C-executive              94
Founder/Co-Founder       83
VP                       11
Non-executive             2
Name: probable_role, dtype: int64

In [109]:
phrase='Talent'
df['occupation'] = df['occupation'].fillna('')
df[df['occupation'].str.contains(phrase)]['occupation'].value_counts().head(60)

Head of Growth at Talent Point                                                                                                                                                          1
Resourcer - Identifying Exceptional Talent On LinkedIn at Vertical Advantage                                                                                                            1
Talent Acquisition Specialist                                                                                                                                                           1
Talent Acquisition Specialist at YouView TV Limited - We're Hiring!                                                                                                                     1
Director & Co-Founder at Sporting Talent                                                                                                                                                1
EMEA Engineering CoE Talent Acquisition Lead + EMEA Talent Acquisition

# Save results

In [112]:
df_to_save=df.reset_index(drop=True)
df_to_save=df_to_save.fillna('')
target_file = os.path.join(data_folder, 'joes-network-with-probable-role.csv')
df_to_save.to_csv(target_file, index=True)

df_to_save

Unnamed: 0,name,occupation,Job title,linkedin_id,email,phone,Personal website,location,company_link,company_linkedin_id,...,company_description,company_size,company_specialties,company_established,company_industry,company_website,company_headquarter,company_logo_url,Relationship,probable_role
0,Daniel Dix,Sales Account Manager at Bytestock,Sales Account Manager,linkedin.com/in/daniel-dix-105970195,dandix9843@icloud.com,,,"Slough, United Kingdom",https://www.linkedin.com/company/bytestock/,bytestock,...,We’re one of Europe’s largest stockists of new...,51-200,"IT,Servers,Workstations,Refurbished IT,Dell,HP...",2004,Information Technology & Services,https://www.bytestock.com/,"Windsor, Berkshire",,Influencer,Top-level-influencer
1,Philip Meier,Software Engineer at EDITED,Software Engineer,linkedin.com/in/philip-meier-99865187,philip.meier06@googlemail.com,,github.com/eatoncns/ (Portfolio)\ntheawokende...,"London, Greater London, United Kingdom",https://www.linkedin.com/company/edited/,edited,...,EDITED is the leader in Retail Market Intellig...,51-200,"Retail Analytics,Pricing & Product Assortment,...",2009,Apparel & Fashion,http://www.edited.com,"London, England",,Influencer,Mid-level-influencer
2,Giles Morgan,Non-Exec Director | Advisor to Fortune 500 | S...,Non-Executive Director,linkedin.com/in/gilesbmorgan,gilesmorgan@me.com,447365759559.0,hubbleventures.com (Company Website)\nplanetu...,"London, England Metropolitan Area",https://www.linkedin.com/company/etch-uk/,etch-uk,...,For business change-makers who want to empower...,51-200,"User Experience,Digital Marketing,Umbraco CMS,...",2012,Design,https://www.etchuk.com,London,,Decision Maker,Director
3,Daven Sanassy,CTO | Vochlea Music,Chief Technology Officer,linkedin.com/in/davens,davens@gmail.com,,,"London, Greater London, United Kingdom",https://www.linkedin.com/company/vochlea-music/,vochlea-music,...,Vochlea Music is a creative technology company...,2-10,"Music,Technology,AI,Musical Instruments,Engine...",2017,Computer Software,http://www.vochlea.co.uk,"London, England",,Decision Maker,C-executive
4,Cecil Adjalo,Yahoo Finance EMPower Top 100 Ethnic Minority ...,Chief Operations Officer,linkedin.com/in/adjalo,cadjalo@hiberus.com,,hiberus.com (Company Website),"London, Greater London, United Kingdom",https://www.linkedin.com/company/hiberus-tecno...,hiberus-tecnologia,...,Hiberus Tecnología is an IT consulting company...,"501-1,000",,2011,Management Consulting,https://www.hiberus.com,"Zaragoza, Aragón",,Buyer,C-executive
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1239,Steve Tanner,Consultant,,linkedin.com/in/steve-tanner-1a91a049,steven.tanner60@gmail.com,,,"London, Greater London, United Kingdom",https://www.linkedin.com/company/quad-security...,quad-security-services,...,,,,,,,,,,Mid-level-influencer
1240,Jason Raju,Principal Consultant | Cyber Security | Archit...,,linkedin.com/in/jasonraju,jraju@aurec.com,'+61 (0) 3 8625 0402,aurec.com.au (Company Website),"Melbourne, Australia",https://www.linkedin.com/company/aurec/,aurec,...,Aurec was founded in 2002 in Australia with on...,51-200,"Information Technology,Sales & Marketing,Banki...",2002,Staffing & Recruiting,http://www.aurec.com,"Sydney, NSW",,,Mid-level-influencer
1241,Jason Laanda,Senior Residential Sales at Parkheath,,linkedin.com/in/jason-laanda-69b77722,jasonlaanda@gmail.com,07891887280,,"London, United Kingdom",https://www.linkedin.com/company/parkheath/,parkheath,...,"Parkheath is a London estate agent, establishe...",11-50,"Residential Sales,Residential Lettings,Propert...",1983,Real Estate,http://www.parkheath.com,"London, London",,,
1242,Sophie Cobden,Life Coach - Seasons Of Life,,linkedin.com/in/sophie-cobden-a24ab12b,sinclair_sophie@hotmail.com,,,United Kingdom,https://www.linkedin.com/search/results/all/?k...,,...,,,,,,,,,,
