In [3]:
import pandas as pd
import numpy as np
import requests
import logging
import warnings
import re
import os


# Set up logging and ignore warnings
warnings.filterwarnings("ignore")
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def search_employees_one_row_per_employee_dedup(
    query,
    country_filter=None,
    location_filter=None,
    company_filter=None,
    university_filter=None,
    industry_filter=None,
    skills_filter=None,
    certifications_filter=None,
    languages_filter=None,
    max_to_fetch=None
):
    """
    Search employees by:
      - 'query' (e.g. 'CEO', 'CEO OR CFO', etc.)
      - Optional filters:
            country_filter (e.g. 'South Africa'),
            location_filter (e.g. 'Johannesburg, Gauteng, South Africa'),
            company_filter (search in company names),
            university_filter (search in university names),
            industry_filter (search in the top-level industry field),
            skills_filter (search in skills),
            certifications_filter (search in certifications),
            languages_filter (search in languages),
            projects_filter (provided for consistency but not used in the search query).

    In the final DataFrame (one row per employee):
      - Keeps: ID, Name, Headline/Title, Location, Country, URL, Canonical_URL, Industry,
               Experience Count, Summary.
      - Includes: deduplicated Experiences (with duration), Educations, Skills, Certifications,
                  Languages, and Projects.
    """
    # Build the list of must clauses.
    must_clauses = []

    # Base clause: search in experience title
    must_clauses.append({
        "nested": {
            "path": "experience",
            "query": {
                "query_string": {
                    "query": query,
                    "default_field": "experience.position_title",
                    "default_operator": "and"
                }
            }
        }
    })

    # Additional filter: Company Name (in experience)
    if company_filter:
        must_clauses.append({
            "nested": {
                "path": "experience",
                "query": {
                    "query_string": {
                        "query": company_filter,
                        "default_field": "experience.company_name",
                        "default_operator": "or"
                    }
                }
            }
        })

    # Additional filter: University Name (in education)
    if university_filter:
        must_clauses.append({
            "nested": {
                "path": "education",
                "query": {
                    "query_string": {
                        "query": university_filter,
                        "default_field": "education.institution_name",
                        "default_operator": "or"
                    }
                }
            }
        })

    # Additional filter: Industry (in experience)
    if industry_filter:
        must_clauses.append({
            "nested": {
                "path": "experience",
                "query": {
                    "query_string": {
                        "query": industry_filter,
                        "default_field": "experience.company_industry",
                        "default_operator": "or"
                    }
                }
            }
        })

    # Additional filter: Skills (in inferred_skills)
    if skills_filter:
        must_clauses.append({
            "query_string": {
                "query": skills_filter,
                "default_field": "inferred_skills",
                "default_operator": "or"
            }
        })

    # Additional filter: Certifications
    if certifications_filter:
        must_clauses.append({
            "nested": {
                "path": "certifications",
                "query": {
                    "query_string": {
                        "query": certifications_filter,
                        "default_field": "certifications.title",
                        "default_operator": "or"
                    }
                }
            }
        })

    # Additional filter: Languages
    if languages_filter:
        must_clauses.append({
            "nested": {
                "path": "languages",
                "query": {
                    "query_string": {
                        "query": languages_filter.lower(),
                        "default_field": "languages.language",
                        "default_operator": "or"
                    }
                }
            }
        })

    # Exclude patterns in titles
    exclude_patterns = ["PA to", "Assistant to", "Personal Assistant", "EA to", "Executive Assistant to","Head of the Office of the CFO","Head of the Office of the CEO"]
    must_not_clauses = [
        {
            "nested": {
                "path": "experience",
                "query": {
                    "query_string": {
                        "query": f"experience.position_title:({pattern})",
                        "default_operator": "or"
                    }
                }
            }
        }
        for pattern in exclude_patterns
    ]

    # Build the complete payload with country and location filters added.
    payload = {
        "query": {
            "bool": {
                "must": must_clauses,
                "must_not": must_not_clauses
            }
        }
    }

    if country_filter:
        payload["query"]["bool"]["must"].append({
            "query_string": {
                "query": country_filter,
                "default_field": "location_country",
                "default_operator": "and"
            }
        })

    if location_filter:
        payload["query"]["bool"]["must"].append({
            "query_string": {
                "query": location_filter,
                "default_field": "location_full",
                "default_operator": "and"
            }
        })

    # Uncomment for debugging:
    # print(json.dumps(payload, indent=2))

    # Send the search request.
    search_url = "https://api.coresignal.com/cdapi/v1/multi_source/employee/search/es_dsl"
    headers = {
    'Content-Type': 'application/json',
    'Authorization': 'Bearer eyJhbGciOiJFZERTQSIsImtpZCI6IjMzNjEyYzA1LWQ2MDYtYzllYy0zNGVjLWRiYmJiNGI0ZjgyMCJ9.eyJhdWQiOiJtdWx0aWNob2ljZS5jby56YSIsImV4cCI6MTc3MzQwNjg1OCwiaWF0IjoxNzQxODQ5OTA2LCJpc3MiOiJodHRwczovL29wcy5jb3Jlc2lnbmFsLmNvbTo4MzAwL3YxL2lkZW50aXR5L29pZGMiLCJuYW1lc3BhY2UiOiJyb290IiwicHJlZmVycmVkX3VzZXJuYW1lIjoibXVsdGljaG9pY2UuY28uemEiLCJzdWIiOiI5Nzg4ZDg5Ni0yNzBjLTU4NjgtMTY0Mi05MWFiZDk0MGEwODYiLCJ1c2VyaW5mbyI6eyJzY29wZXMiOiJjZGFwaSJ9fQ.GFaoIY_j8e3TKs9-iQ0H6O7NVz87T3Z7ZWIWPRHo17IrWqmehNvvJ8sD3BMaDVatHs9rr9C3hpUykkwS53HrAw' 
    }

    resp = requests.post(search_url, headers=headers, json=payload)
    resp.raise_for_status()
    employee_ids = resp.json()

    if not isinstance(employee_ids, list):
        print("Unexpected structure in search response.")
        return pd.DataFrame()

    # Collect data for each employee ID.
    rows = []
    for emp_id in employee_ids[:max_to_fetch]:
        
        collect_url = f"https://api.coresignal.com/cdapi/v1/multi_source/employee/collect/{emp_id}"
        r = requests.get(collect_url, headers=headers)
        r.raise_for_status()
        employee = r.json()

        # Basic fields
        id_val = employee.get("id")
        name_val = employee.get("full_name")
        headline_val = employee.get("headline")
        location_val = employee.get("location_full")
        country_val = employee.get("location_country")
        url_val = employee.get("linkedin_url")
        canonical_url = employee.get("linkedin_url")  # Using LinkedIn URL as canonical
        industry_val = None  # Not available in top level, will need to be extracted from experience
        experience_count_val = len(employee.get("experience", []))
        summary_val = employee.get("summary")
        
        # Get email information
        primary_email = employee.get("primary_professional_email")
        
        # Get all email addresses from collection
        email_collection = employee.get("professional_emails_collection", [])
        all_emails = [email_info.get("professional_email") for email_info in email_collection if email_info.get("professional_email")]
        all_emails_str = ", ".join(all_emails) if all_emails else ""

        # ----- EXPERIENCE (deduplicate) -----
        raw_exps = employee.get("experience", [])
        unique_exps = []
        seen_exps = set()
        company_industries = set()  # Set to collect unique industries
        for exp in raw_exps:
            key = (
                exp.get("position_title", "N/A"),
                exp.get("company_name", "N/A"),
                exp.get("date_from", "N/A"),
                exp.get("date_to", "N/A")
            )
            if key not in seen_exps:
                seen_exps.add(key)
                unique_exps.append(exp)
                # Add industry to the set if it exists
                if exp.get("company_industry"):
                    company_industries.add(exp.get("company_industry"))

        experiences_str = "\n".join(
            f"Role: {exp.get('position_title','N/A')} | Company: {exp.get('company_name','N/A')} | From: {exp.get('date_from','N/A')} | To: {exp.get('date_to','N/A')} | Duration: {exp.get('duration_months','N/A')} months"
            for exp in unique_exps
        )

        # Create a formatted string of industries
        company_industry_str = " | ".join(sorted(company_industries)) if company_industries else "N/A"

        # ----- EDUCATION (deduplicate) -----
        raw_edu = employee.get("education", [])
        unique_edu = []
        seen_edu = set()
        for edu in raw_edu:
            key = (
                edu.get("institution_name", "N/A"),
                edu.get("degree", "N/A"),
                str(edu.get("date_from_year", "N/A")),
                str(edu.get("date_to_year", "N/A"))
            )
            if key not in seen_edu:
                seen_edu.add(key)
                unique_edu.append(edu)
        educations_str = "\n".join(
            f"Institution: {edu.get('institution_name','N/A')} | Degree: {edu.get('degree','N/A')} | From: {edu.get('date_from_year','N/A')} | To: {edu.get('date_to_year','N/A')}"
            for edu in unique_edu
        )

        # ----- SKILLS (deduplicate) -----
        skills = employee.get("inferred_skills", [])
        skills_str = ", ".join(skills) if skills else ""

        # ----- CERTIFICATIONS (deduplicate) -----
        raw_certifications = employee.get("certifications", [])
        seen_certs = set()
        for cert in raw_certifications:
            cert_name = cert.get("title", "N/A")
            seen_certs.add(cert_name)
        certifications_str = ", ".join(seen_certs) if seen_certs else ""

        # ----- LANGUAGES (deduplicate) -----
        raw_languages = employee.get("languages", [])
        seen_langs = set()
        for lang in raw_languages:
            language_name = lang.get("language", "N/A")
            seen_langs.add(language_name)
        languages_str = ", ".join(seen_langs) if seen_langs else ""

        # ----- PROJECTS (deduplicate) -----
        raw_projects = employee.get("projects", [])
        seen_projects = set()
        for proj in raw_projects:
            proj_name = proj.get("name", "N/A")
            seen_projects.add(proj_name)
        projects_str = ", ".join([str(x) for x in seen_projects if x is not None]) if seen_projects else ""

        # ----- AWARDS (deduplicate) -----
        raw_awards = employee.get("awards", [])
        seen_awards = set()
        for award in raw_awards:
            award_name = award.get("title", "N/A")
            seen_awards.add(award_name)
        awards_str = ", ".join(seen_awards) if seen_awards else ""

        # ----- PATENTS (deduplicate) -----
        raw_patents = employee.get("patents", [])
        seen_patents = set()
        for patent in raw_patents:
            patent_name = patent.get("title", "N/A")
            seen_patents.add(patent_name)
        patents_str = ", ".join(seen_patents) if seen_patents else ""

        # ----- PUBLICATIONS (deduplicate) -----
        raw_publications = employee.get("publications", [])
        seen_publications = set()
        for pub in raw_publications:
            pub_name = pub.get("title", "N/A")
            seen_publications.add(pub_name)
        publications_str = ", ".join(seen_publications) if seen_publications else ""

        # ----- SALARY INFORMATION -----
        projected_base_salary_median = employee.get("projected_base_salary_median")
        projected_base_salary_currency = employee.get("projected_base_salary_currency")
        projected_base_salary_period = employee.get("projected_base_salary_period")
        
        salary_str = ""
        if projected_base_salary_median:
            salary_str = f"{projected_base_salary_currency}{projected_base_salary_median:,.2f} {projected_base_salary_period}"

        # Build the final row dictionary.
        row = {
            "ID": id_val,
            "Name": name_val,
            "Headline/Title": headline_val,
            "Location": location_val,
            "Country": country_val,
            "URL": url_val,
            "Primary Email": primary_email,
            "All Emails": all_emails_str,
            "Industry": company_industry_str, 
            "Experience Count": experience_count_val,
            "Summary": summary_val,
            "Experiences": experiences_str,
            "Educations": educations_str,
            "Skills": skills_str,
            "Certifications": certifications_str,
            "Languages": languages_str,
        }
        rows.append(row)

    # After the search API call
    df = pd.DataFrame(rows)

    return df

if __name__ == "__main__":
    user_query = '("Chief Financial Officer") OR ("CFO")'
    #country = "South Africa"
    #location = '("Johannesburg")  OR ("Cape Town")'
    # company = '("Pizza Hut") OR ("PWC")'
    #university = '("University of Cape Town") OR ("University of Johannesburg")'
    industry = '("Farming")'
    #skills = "research"
    #certifications = "Assessor"
    #languages = "English"

    df_employees = search_employees_one_row_per_employee_dedup(
        query=user_query,
        # country_filter=country,
        # location_filter=location,
        # company_filter=company,
        # university_filter=university,
        industry_filter=industry,
        # skills_filter=skills,
        # certifications_filter=certifications,
        # languages_filter=languages,
        max_to_fetch=3
    )

In [4]:
df_employees

Unnamed: 0,ID,Name,Headline/Title,Location,Country,URL,Canonical_URL,Primary Email,All Emails,Industry,Experience Count,Summary,Experiences,Educations,Skills,Certifications,Languages
0,1340072,Bart Wyrick,Portfolio Manager at Country Club Trust Company,"Overland Park, Kansas, United States",United States,https://www.linkedin.com/in/bart-wyrick-222b9368,https://www.linkedin.com/in/bart-wyrick-222b9368,,,Accounting | Farming | Financial Services,8,,Role: Portfolio Manager | Company: Country Clu...,Institution: University of Chicago Graduate Sc...,,,
1,1631055,Brett Jones,Chief Financial Officer at TriCal Group,"Pinehurst, North Carolina, United States",United States,https://www.linkedin.com/in/brett-jones-cpa-15...,https://www.linkedin.com/in/brett-jones-cpa-15...,bjones@tricalgroup.com,"bjones@tricalgroup.com, b_jones@tricalgroup.co...",Accounting | Farming | Professional Services,4,I am excited to serve as CFO of the TriCal Gro...,Role: Chief Financial Officer | Company: TriCa...,Institution: North Carolina State University |...,"accounting, acquisitions, analysis, assurance,...",Certified Public Accountant,
2,1710616,Blake Barksdale,CFO at Southern Ag Services and Alliance Ag Ri...,"West Point, Mississippi, United States",United States,https://www.linkedin.com/in/blake-barksdale-cp...,https://www.linkedin.com/in/blake-barksdale-cp...,blake.barksdale@southernagllc.com,blake.barksdale@southernagllc.com,Farming,1,Certified Public Accountant Certified Manageme...,Role: Chief Financial Officer | Company: South...,Institution: Mississippi State University | De...,"accounting, accounts payable, cost accounting,...","Certified Management Accountant, Certified Pub...",


In [5]:
df_employees.to_csv('df_employees.csv')

In [4]:
df_employees[df_employees['Experiences'].str.contains('PWC', na=False)]

Unnamed: 0,ID,Name,Headline/Title,Location,Country,URL,Canonical_URL,Primary Email,All Emails,Industry,Experience Count,Summary,Experiences,Educations,Skills,Certifications,Languages


In [5]:
import pandas as pd
import numpy as np

def generate_dummy_data(num_records=100):
    """Generate realistic dummy data for candidates from various countries."""
    np.random.seed(42)

    # Define data for different countries
    countries_data = {
        "ZA": {
            "cities": ["Johannesburg", "Cape Town", "Durban", "Pretoria", "Port Elizabeth", "Bloemfontein", "East London"],
            "provinces": ["Gauteng", "Western Cape", "KwaZulu-Natal", "Eastern Cape", "Free State", "North West", "Mpumalanga", "Limpopo", "Northern Cape"],
            "languages": ["English", "Zulu", "Xhosa", "Afrikaans", "Pedi", "Tswana", "Sotho", "Tsonga", "Swati", "Venda", "Ndebele"],
            "first_names": ["Stefan", "Thabo", "Anele", "Nomsa", "Sipho", "Zanele", "Lee", "Puleng", "Kgomotso", "Tshepo"],
            "last_names": ["Ross", "Nkosi", "Mthembu", "Khumalo", "Mokoena", "Mabaso", "Davids", "Xaba", "Ntuli", "Msimang"],
            "universities": ["University of Cape Town", "University of the Witwatersrand", "Stellenbosch University", "University of Pretoria", "University of Johannesburg"],
            "companies": ["MTN", "Vodacom", "Standard Bank", "FirstRand", "Sasol"]
        }
    }

    # Job titles and relevant skills and certifications
    job_titles = {
        "Data Scientist": {
            "skills": ["Python", "Machine Learning", "Data Analysis", "SQL", "R", "TensorFlow"],
            "certifications": ["Microsoft Certified Azure: Data Science Associate", "AWS Cloud Practitioner"]
        },
        "Software Engineer": {
            "skills": ["Java", "Python", "JavaScript", "C++", "Software Development", "Agile"],
            "certifications": ["Certified Software Engineer", "AWS Certification"]
        },
        "Product Manager": {
            "skills": ["Product Development", "Market Analysis", "Agile", "Scrum", "Leadership"],
            "certifications": ["Certified Product Manager", "Scrum Master Certification"]
        },
        "Financial Analyst": {
            "skills": ["Financial Modeling", "Data Analysis", "Excel", "Risk Management", "Accounting"],
            "certifications": ["Certified Financial Analyst", "Chartered Financial Analyst (CFA)"]
        },
        "Marketing Specialist": {
            "skills": ["Digital Marketing", "SEO", "Content Creation", "Social Media", "Market Research"],
            "certifications": ["Google Analytics Certification", "HubSpot Content Marketing Certification"]
        }
    }

    education_details = {
        "ZA": [
            {"institution": "University of Cape Town", "degree": "Bachelor of Science in Computer Science", "from": "2010", "to": "2014"},
            {"institution": "University of the Witwatersrand", "degree": "Master of Business Administration", "from": "2015", "to": "2017"},
            {"institution": "Stellenbosch University", "degree": "Bachelor of Commerce in Finance", "from": "2011", "to": "2014"},
            {"institution": "University of Pretoria", "degree": "Bachelor of Engineering", "from": "2012", "to": "2016"},
            {"institution": "University of Johannesburg", "degree": "Bachelor of Arts in Psychology", "from": "2010", "to": "2013"}
        ]
    }

    # Sample experience details without company names
    experience_details = {
        "Data Scientist": [
            {"role": "Data Scientist", "from": "April 2020", "to": "Present", "duration": "36 months"},
            {"role": "Senior Data Analyst", "from": "June 2017", "to": "March 2020", "duration": "34 months"},
        ],
        "Software Engineer": [
            {"role": "Software Engineer", "from": "January 2019", "to": "Present", "duration": "48 months"},
            {"role": "Junior Developer", "from": "May 2016", "to": "December 2018", "duration": "32 months"},
        ],
        "Product Manager": [
            {"role": "Product Manager", "from": "March 2018", "to": "Present", "duration": "40 months"},
            {"role": "Associate Product Manager", "from": "July 2015", "to": "February 2018", "duration": "30 months"},
        ],
        "Financial Analyst": [
            {"role": "Financial Analyst", "from": "September 2019", "to": "Present", "duration": "24 months"},
            {"role": "Junior Financial Analyst", "from": "January 2017", "to": "August 2019", "duration": "32 months"},
        ],
        "Marketing Specialist": [
            {"role": "Marketing Specialist", "from": "November 2018", "to": "Present", "duration": "38 months"},
            {"role": "Marketing Coordinator", "from": "February 2016", "to": "October 2018", "duration": "32 months"},
        ]
    }

    # Sample summary details
    summary_details = {
        "Data Scientist": [
            "Experienced Data Scientist with a strong background in machine learning and statistical analysis. Skilled in using Python, R, and SQL to extract insights from complex datasets.",
            "Passionate about leveraging data to drive business decisions and improve operational efficiency. Proven track record of delivering high-impact data solutions."
        ],
        "Software Engineer": [
            "Highly skilled Software Engineer with experience in developing scalable and robust software applications. Proficient in multiple programming languages, including Java, Python, and JavaScript.",
            "Strong problem-solving abilities and a keen eye for detail, ensuring high-quality code and efficient solutions. Experienced in working with agile methodologies and collaborating with cross-functional teams."
        ],
        "Product Manager": [
            "Results-driven Product Manager with a proven track record of successfully launching products in competitive markets. Skilled in market analysis, product roadmapping, and stakeholder management.",
            "Strong leadership and communication skills, with experience in leading cross-functional teams. Passionate about understanding customer needs and translating them into successful products."
        ],
        "Financial Analyst": [
            "Detail-oriented Financial Analyst with experience in financial modeling, forecasting, and reporting. Skilled in using advanced Excel functions and financial software to analyze data and generate insights.",
            "Strong analytical and problem-solving skills, with a focus on driving business growth and efficiency. Experienced in collaborating with cross-functional teams to develop strategic financial plans."
        ],
        "Marketing Specialist": [
            "Creative Marketing Specialist with experience in developing and executing successful marketing campaigns. Skilled in digital marketing, social media management, and content creation.",
            "Strong analytical skills, with experience in using data to optimize marketing strategies and improve ROI. Experienced in collaborating with design and sales teams to create cohesive marketing plans."
        ]
    }

    # Initialize data dictionary
    data = {
        "full_name": [],
        "country": [],
        "country_full_name": [],
        "province": [],
        "city": [],
        "personal_emails": [],
        "personal_numbers": [],
        "URL": [],
        "gender": [],
        "headline": [],
        "summary": [],
        "industry": [],
        "experiences": [],
        "education": [],
        "skills": [],
        "certifications": [],
        "languages": [],
        "university": [],
        "company": []
    }

    # Generate data for each record
    for i in range(num_records):
        country = np.random.choice(list(countries_data.keys()))
        country_info = countries_data[country]

        full_name = f"{np.random.choice(country_info['first_names'])} {np.random.choice(country_info['last_names'])}"
        data["full_name"].append(full_name)
        data["country"].append(country)
        data["country_full_name"].append(country)
        data["province"].append(np.random.choice(country_info['provinces']))
        data["city"].append(np.random.choice(country_info['cities']))
        data["personal_emails"].append(f"{full_name.lower().replace(' ', '.')}@example.com")
        data["personal_numbers"].append(f"+{np.random.randint(1, 99)}{np.random.randint(100000000, 999999999)}")
        data["URL"].append(f"https://www.linkedin.com/in/{full_name.lower().replace(' ', '-')}")
        data["gender"].append(np.random.choice(["Male", "Female", "Other"]))

        # Assign job title and related details
        job_title = np.random.choice(list(job_titles.keys()))
        data["headline"].append(job_title)
        data["skills"].append(", ".join(job_titles[job_title]["skills"]))
        data["certifications"].append(", ".join(job_titles[job_title]["certifications"]))

        # Assign industry and other details
        data["industry"].append(np.random.choice(["Technology", "Finance", "Healthcare", "Education", "Retail"]))

        # Assign education details
        education = np.random.choice(education_details[country])
        data["education"].append(education)
        data["university"].append(education["institution"])  # Make university column match institution

        # Assign experience details with dynamic company names
        experience = np.random.choice(experience_details[job_title])
        experience["company"] = np.random.choice(country_info["companies"])
        data["experiences"].append(experience)

        data["summary"].append(np.random.choice(summary_details[job_title]))
        data["languages"].append(np.random.choice(country_info['languages']))

        # Assign company
        data["company"].append(np.random.choice(country_info['companies']))

    return pd.DataFrame(data)

df = generate_dummy_data(num_records=100)
df

Unnamed: 0,full_name,country,country_full_name,province,city,personal_emails,personal_numbers,URL,gender,headline,summary,industry,experiences,education,skills,certifications,languages,university,company
0,Lee Khumalo,ZA,ZA,Limpopo,Port Elizabeth,lee.khumalo@example.com,+21770094950,https://www.linkedin.com/in/lee-khumalo,Female,Product Manager,"Strong leadership and communication skills, wi...",Healthcare,"{'role': 'Associate Product Manager', 'from': ...","{'institution': 'Stellenbosch University', 'de...","Product Development, Market Analysis, Agile, S...","Certified Product Manager, Scrum Master Certif...",Tsonga,Stellenbosch University,Standard Bank
1,Zanele Mokoena,ZA,ZA,Western Cape,Pretoria,zanele.mokoena@example.com,+30103344769,https://www.linkedin.com/in/zanele-mokoena,Male,Data Scientist,Passionate about leveraging data to drive busi...,Education,"{'role': 'Senior Data Analyst', 'from': 'June ...",{'institution': 'University of the Witwatersra...,"Python, Machine Learning, Data Analysis, SQL, ...",Microsoft Certified Azure: Data Science Associ...,Swati,University of the Witwatersrand,MTN
2,Tshepo Mthembu,ZA,ZA,Mpumalanga,Pretoria,tshepo.mthembu@example.com,+64299502978,https://www.linkedin.com/in/tshepo-mthembu,Male,Product Manager,"Strong leadership and communication skills, wi...",Retail,"{'role': 'Product Manager', 'from': 'March 201...","{'institution': 'University of Cape Town', 'de...","Product Development, Market Analysis, Agile, S...","Certified Product Manager, Scrum Master Certif...",Swati,University of Cape Town,FirstRand
3,Thabo Msimang,ZA,ZA,Northern Cape,Cape Town,thabo.msimang@example.com,+53891274835,https://www.linkedin.com/in/thabo-msimang,Other,Financial Analyst,"Strong analytical and problem-solving skills, ...",Education,"{'role': 'Junior Financial Analyst', 'from': '...","{'institution': 'University of Johannesburg', ...","Financial Modeling, Data Analysis, Excel, Risk...","Certified Financial Analyst, Chartered Financi...",English,University of Johannesburg,FirstRand
4,Thabo Xaba,ZA,ZA,Eastern Cape,Cape Town,thabo.xaba@example.com,+6993102645,https://www.linkedin.com/in/thabo-xaba,Female,Financial Analyst,"Strong analytical and problem-solving skills, ...",Retail,"{'role': 'Junior Financial Analyst', 'from': '...",{'institution': 'University of the Witwatersra...,"Financial Modeling, Data Analysis, Excel, Risk...","Certified Financial Analyst, Chartered Financi...",Venda,University of the Witwatersrand,FirstRand
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Thabo Ntuli,ZA,ZA,Northern Cape,Port Elizabeth,thabo.ntuli@example.com,+26269336878,https://www.linkedin.com/in/thabo-ntuli,Female,Data Scientist,Experienced Data Scientist with a strong backg...,Finance,"{'role': 'Senior Data Analyst', 'from': 'June ...","{'institution': 'University of Johannesburg', ...","Python, Machine Learning, Data Analysis, SQL, ...",Microsoft Certified Azure: Data Science Associ...,Pedi,University of Johannesburg,MTN
96,Anele Xaba,ZA,ZA,Western Cape,Durban,anele.xaba@example.com,+87294741724,https://www.linkedin.com/in/anele-xaba,Female,Financial Analyst,Detail-oriented Financial Analyst with experie...,Healthcare,"{'role': 'Financial Analyst', 'from': 'Septemb...",{'institution': 'University of the Witwatersra...,"Financial Modeling, Data Analysis, Excel, Risk...","Certified Financial Analyst, Chartered Financi...",English,University of the Witwatersrand,Standard Bank
97,Thabo Mthembu,ZA,ZA,Free State,Durban,thabo.mthembu@example.com,+40920476889,https://www.linkedin.com/in/thabo-mthembu,Other,Financial Analyst,Detail-oriented Financial Analyst with experie...,Technology,"{'role': 'Financial Analyst', 'from': 'Septemb...","{'institution': 'University of Pretoria', 'deg...","Financial Modeling, Data Analysis, Excel, Risk...","Certified Financial Analyst, Chartered Financi...",Zulu,University of Pretoria,MTN
98,Tshepo Nkosi,ZA,ZA,Mpumalanga,Durban,tshepo.nkosi@example.com,+42191351453,https://www.linkedin.com/in/tshepo-nkosi,Other,Marketing Specialist,Creative Marketing Specialist with experience ...,Retail,"{'role': 'Marketing Coordinator', 'from': 'Feb...",{'institution': 'University of the Witwatersra...,"Digital Marketing, SEO, Content Creation, Soci...","Google Analytics Certification, HubSpot Conten...",Sotho,University of the Witwatersrand,FirstRand


In [3]:
df[df['full_name']=='Stefan Ross']

Unnamed: 0,full_name,country,country_full_name,province,city,personal_emails,personal_numbers,URL,gender,headline,summary,industry,experiences,education,skills,certifications,languages,university,company
40,Stefan Ross,ZA,ZA,Free State,Bloemfontein,stefan.ross@example.com,3749218703,https://www.linkedin.com/in/stefan-ross,Other,Data Scientist,Passionate about leveraging data to drive busi...,Healthcare,"{'role': 'Senior Data Analyst', 'from': 'June ...",Bachelor of Engineering from University of Pre...,"Python, Machine Learning, Data Analysis, SQL, ...",Microsoft Certified Azure: Data Science Associ...,Pedi,University of the Witwatersrand,FirstRand
