## Python script to extract information from LinkedIn

## Install / Import required libraries

In [1]:
from linkedin_api import Linkedin
import pandas as pd
from datetime import date

## Connect with the linkedin api

In [6]:
api = Linkedin('enter-your-emal', 'enter-your-password')

## Fetch required elements / parameters

### Profile info

In [171]:
profile_id = 'fahimyousufzai'
profile = api.get_profile(profile_id)

profile_info = {
    'name': f"{profile['firstName'] if 'firstName' in profile else None} {profile['lastName'] if 'lastName' in profile else ''}",
    'headline': profile['headline'] if 'headline' in profile else None,
    'summary' : profile['summary'] if 'summary' in profile else None,
    'industry_name': profile['industryName'] if 'industryName' in profile else None
}

profile_info

{'name': 'Fahim Ahmad',
 'headline': 'Quantitative Data Expert',
 'summary': 'An aspiring Data Scientist with a Master of Arts (M.A.) focused in Economics and demonstrated history of working in the multi-cultural international organizations. Skilled in Data Analysis and Social Survey & Research. Currently studying IT / Web Technology at Hannover University of Applied Sciences and Arts, and Computer & Data Science at MIT Emerging Talent / MIT ReACT.',
 'industry_name': 'Research'}

In [89]:
# profile_info = {
#     'name': f"{profile['firstName']} {profile['lastName']}",
#     'headline': profile['headline'],
#     'summary' : profile['summary'],
#     'industry_name': profile['industryName']
# }

# profile_info

### Skills

In [172]:
skills = api._fetch(f"/identity/profiles/{profile_id}/skills").json().get('elements', [])
skill_names = [i['name'] if 'name' in i else '' for i in skills]
skill_names

['GitHub',
 'Git',
 'Natural Language Processing (NLP)',
 'CSS',
 'JavaScript',
 'HTML5',
 'Statistics',
 'Machine Learning',
 'Python (Programming Language)',
 'SQL',
 'Data Analysis',
 'Quantitative Research',
 'Research',
 'R']

In [96]:
# skill_names = [i['name'] for i in skills]
# skill_names

### Education

In [173]:
educations = api._fetch(f"/identity/profiles/{profile_id}/educations").json().get('elements', [])

education = [{
    'degreeName': [i['degreeName'] if 'degreeName' in i else None for i in educations][n],
    'schoolName': [i['schoolName'] if 'schoolName' in i else None for i in educations][n],
    'startYear': [i['timePeriod']['startDate']['year'] if 'startDate' in i['timePeriod'] else None for i in educations][n],
    'endYear': [i['timePeriod']['endDate']['year'] if 'endDate' in i['timePeriod'] else None for i in educations][n],
} for n in range(len(educations))]

education

[{'degreeName': 'Computer and Data Science',
  'schoolName': 'Massachusetts Institute of Technology',
  'startYear': 2023,
  'endYear': 2024},
 {'degreeName': 'IT expert data science / web technology',
  'schoolName': 'Hochschule Hannover',
  'startYear': 2023,
  'endYear': 2024},
 {'degreeName': 'Master of Arts (M.A.)',
  'schoolName': 'Kabul University',
  'startYear': 2015,
  'endYear': 2016},
 {'degreeName': 'Bachelor of Arts (B.A.)',
  'schoolName': 'Kabul University',
  'startYear': 2010,
  'endYear': 2013}]

In [152]:
# education = [{
#     'degreeName': [i['degreeName'] for i in educations][n],
#     'schoolName': [i['schoolName'] for i in educations][n],
#     'startYear': [i['timePeriod']['startDate']['year'] for i in educations][n],
#     'endYear': [i['timePeriod']['endDate']['year'] for i in educations][n],
# } for n in range(len(educations))]

# education

In [153]:
# Certifications
# certifications = api._fetch(f"/identity/profiles/{profile_id}/certifications").json().get('elements', [])

### Experience

In [174]:
experiences = api._fetch(f"/identity/profiles/{profile_id}/positions").json().get('elements', [])

experience = [{
    'title': [i['title'] if 'title' in i else None for i in experiences][n],
    'companyName': [i['companyName'] if 'companyName' in i else None for i in experiences][n],
    'startYear': [i['timePeriod']['startDate']['year'] if 'startDate' in i['timePeriod'] else None for i in experiences][n],
    'endYear': [i['timePeriod']['endDate']['year'] if 'endDate' in i['timePeriod'] else 'Present' for i in experiences][n],
} for n in range(len(experiences))]

experience

[{'title': 'Quantitative Data Expert',
  'companyName': 'Assess Transform Reach (ATR) Consulting',
  'startYear': 2020,
  'endYear': 2022},
 {'title': 'Research Consultant',
  'companyName': "Afghan Women's Network",
  'startYear': 2020,
  'endYear': 2020},
 {'title': 'Policy and Research Senior Data Analyst',
  'companyName': 'The Asia Foundation',
  'startYear': 2018,
  'endYear': 2020},
 {'title': 'Data Analyst',
  'companyName': 'Deutsche Gesellschaft für Internationale Zusammenarbeit (GIZ) GmbH',
  'startYear': 2018,
  'endYear': 2019},
 {'title': 'Policy and Research Data Analyst ',
  'companyName': 'The Asia Foundation',
  'startYear': 2017,
  'endYear': 2018},
 {'title': 'Data Analyst Consultant ',
  'companyName': 'The Asia Foundation',
  'startYear': 2015,
  'endYear': 2017}]

In [156]:
# experience = [{
#     'title': [i['title'] for i in experiences][n],
#     'companyName': [i['companyName'] for i in experiences][n],
#     'startYear': [i['timePeriod']['startDate']['year'] for i in experiences][n],
#     'endYear': [i['timePeriod']['endDate']['year'] for i in experiences][n],
# } for n in range(len(experiences))]

# experience

### Export the data

In [175]:
linkedin_info = {
    'profile_info': profile_info,
    'skills': skill_names,
    'educations': education,
    'experiences': experience
}

df = pd.DataFrame([linkedin_info])
df.to_csv(f"output/linkedin_info-{date.today()}.csv")

## Custome function to extract information from one or more than one linkedin profiles

In [7]:
def fetch_linkedin_info(profile_ids):
    linkedin_info = {}
    
    for profile_id in profile_ids:
        
        profile = api.get_profile(profile_id)
        profile_info = {
            'name': f"{profile['firstName'] if 'firstName' in profile else None} {profile['lastName'] if 'lastName' in profile else ''}",
            'headline': profile['headline'] if 'headline' in profile else None,
            'summary' : profile['summary'] if 'summary' in profile else None,
            'industry_name': profile['industryName'] if 'industryName' in profile else None
        }

        skills = api._fetch(f"/identity/profiles/{profile_id}/skills").json().get('elements', [])
        skill_names = [i['name'] if 'name' in i else '' for i in skills]

        educations = api._fetch(f"/identity/profiles/{profile_id}/educations").json().get('elements', [])
        education = [{
            'degreeName': [i['degreeName'] if 'degreeName' in i else None for i in educations][n],
            'schoolName': [i['schoolName'] if 'schoolName' in i else None for i in educations][n],
            'startYear': [i['timePeriod']['startDate']['year'] if 'startDate' in i['timePeriod'] else None for i in educations][n],
            'endYear': [i['timePeriod']['endDate']['year'] if 'endDate' in i['timePeriod'] else None for i in educations][n],
        } for n in range(len(educations))]

        experiences = api._fetch(f"/identity/profiles/{profile_id}/positions").json().get('elements', [])
        experience = [{
            'title': [i['title'] if 'title' in i else None for i in experiences][n],
            'companyName': [i['companyName'] if 'companyName' in i else None for i in experiences][n],
            'startYear': [i['timePeriod']['startDate']['year'] if 'startDate' in i['timePeriod'] else None for i in experiences][n],
            'endYear': [i['timePeriod']['endDate']['year'] if 'endDate' in i['timePeriod'] else 'Present' for i in experiences][n],
        } for n in range(len(experiences))]

        linkedin_info[profile_id] = {
                'profile_info': profile_info,
                'skills': skill_names,
                'educations': education,
                'experiences': experience
            }

    return linkedin_info

In [212]:
linkedin_info = fetch_linkedin_info(['layan-bekai', 'muhaddesa-noori-534579155', 'adla-abousteiti', 'mohammedalasli', 'fahimyousufzai'])

In [213]:
df = pd.DataFrame.from_dict(linkedin_info, orient='index')
df.to_csv(f"output/linkedin_info_all-{date.today()}.csv", index_label='profile_id')

In [219]:
# linkedin_info['layan-bekai']
# linkedin_info['muhaddesa-noori-534579155']
# linkedin_info['adla-abousteiti']
# linkedin_info['mohammedalasli']
# linkedin_info['fahimyousufzai']

In [220]:
del api