In [1]:
import pandas as pd
from tabulate import tabulate
from IPython.display import display, HTML

# ✅ Step 1: Skip the first two rows — disclaimer + header preview
df = pd.read_csv("Connections.csv", skiprows=2)

# ✅ Step 2: Strip spaces from column names
df.columns = df.columns.str.strip()

# ✅ Step 3: Create Full Name
df["Full Name"] = df["First Name"].fillna('') + " " + df["Last Name"].fillna('')

# ✅ Step 4: Reorder for display
columns_to_show = ["Full Name", "Position", "Company", "Connected On", "Email Address", "URL"]
df = df[columns_to_show]

# ✅ Step 5: Display preview
print("Column Names:", df.columns.tolist())
print("\n🔍 Preview of Cleaned LinkedIn Connections:\n")
print(tabulate(df.head(10), headers="keys", tablefmt="pretty"))

Column Names: ['Full Name', 'Position', 'Company', 'Connected On', 'Email Address', 'URL']

🔍 Preview of Cleaned LinkedIn Connections:

+---+--------------------------------+--------------------------------+---------------------------+--------------+---------------+----------------------------------------------------------------------+
|   |           Full Name            |            Position            |          Company          | Connected On | Email Address |                                 URL                                  |
+---+--------------------------------+--------------------------------+---------------------------+--------------+---------------+----------------------------------------------------------------------+
| 0 |          Sangeetha M           |       AWS Data Engineer        |      Elevance Health      | 24 Mar 2025  |      nan      |          https://www.linkedin.com/in/sangeetha-m-6215891a0           |
| 1 |         Vaibhav  Patil         |   Assistant Syste

In [2]:
job_categories = {
    "Data Science": [
        "data scientist", "machine learning", "ml engineer", "deep learning", "ai", "artificial intelligence"
    ],
    "Data Analyst": [
        "data analyst", "associate analyst", "reporting analyst", "data insights", "data analytics",
        "analytics", "data manager", "analyst", "data analysis"
    ],
    "Business Intelligence": [
        "business analyst", "bi analyst", "business intelligence", "power bi", "tableau", "looker", "qlik", "microstrategy"
    ],
    "Data Engineer": [
        "data engineer", "etl developer", "etl engineer", "big data", "data pipeline", "data integration"
    ],
    "Web Development": [
        "web developer", "frontend developer", "front end", "backend developer", "back end",
        "full stack developer", "full-stack", "web engineer", "web development intern", "web programming intern",
        "full stack engineer", "sr java full stack engineer"
    ],
    "Software Engineering": [
        "software engineer", "developer", "programmer", "system engineer", "systems engineer",
        "senior system executive", "software development engineer", "software development engineer 2",
        "associate engineer", "sde", "system development engineer"
    ],
    "Cybersecurity": [
        "cyber security", "cybersecurity", "security analyst", "security engineer",
        "infosec", "information security", "application security",
        "penetration tester", "vulnerability engineer", "security consultant", "red team", "blue team",
        "cyberops associate", "vulnerability management co-op"
    ],
    "DevOps / Cloud": [
        "devops", "cloud engineer", "site reliability", "sre", "infrastructure", "aws", "azure", "gcp"
    ],
    "Product / Project": [
        "product manager", "project manager", "product owner", "scrum master", "agile"
    ],
    "Quality Assurance / Testing": [
        "qa", "quality assurance", "test engineer", "testing", "manual tester",
        "automation tester", "qa manager"
    ],
    "Talent / HR": [
        "talent acquisition", "recruiter", "human resources", "hr manager", "hrbp",
        "technical recruiter", "recruitment manager"
    ],
    "Consulting": [
        "consultant", "strategy consultant", "business consultant", "implementation consultant"
    ],
    "Marketing": [
        "marketing", "digital marketing", "content marketing", "growth marketing", "seo specialist"
    ],
    "Student": [
        "student", "graduate", "undergraduate", "bachelor", "msc", "ms", "phd", "fresher", "research assistant"
    ],
    "Other": []
}


In [3]:
# Step 5: Define function to classify
def classify_position(position):
    if pd.isnull(position):
        return "Other"
    
    position = position.lower()
    for category, keywords in job_categories.items():
        if any(keyword in position for keyword in keywords):
            return category
    return "Other"

In [4]:
# Step 6: Apply classification
df["Job Category"] = df["Position"].apply(classify_position)

In [5]:
# Step 7: Reorder columns for display
columns_to_show = ["Full Name", "Position", "Job Category", "Company", "Connected On", "Email Address", "URL"]
subset = df[columns_to_show]

In [6]:
# Step 8: Display as scrollable table in notebook
display(HTML(
    subset.to_html(classes='output_html', index=False)
    + """
    <style>
        .output_html {
            display: block;
            overflow-x: auto;
            overflow-y: auto;
            max-height: 400px;
            max-width: 100%;
            border: 1px solid #aaa;
            font-family: Arial, sans-serif;
        }
    </style>
    """
))

Full Name,Position,Job Category,Company,Connected On,Email Address,URL
Sangeetha M,AWS Data Engineer,Data Engineer,Elevance Health,24 Mar 2025,,https://www.linkedin.com/in/sangeetha-m-6215891a0
Vaibhav Patil,Assistant System Engineer,Software Engineering,Tata Consultancy Services,24 Mar 2025,,https://www.linkedin.com/in/vaibhav-patil-8b908720b
Yingdan Shi,Business Analyst,Data Analyst,Meituan,23 Mar 2025,,https://www.linkedin.com/in/yingdan-shi-591229265
Shubham Srivastava,Senior Data Engineer,Data Engineer,Amazon,22 Mar 2025,,https://www.linkedin.com/in/shubham-srivstv
Nazid Shaik,Software Engineer,Software Engineering,Ford Motor Company,22 Mar 2025,,https://www.linkedin.com/in/nazid-shaik
Anish Viswanathan VR,Associate Salesforce Developer,Software Engineering,zeb,21 Mar 2025,,https://www.linkedin.com/in/anish-viswanathan-vr-50001a235
Baba M,Data Engineer,Data Engineer,Destin IT Solutions,20 Mar 2025,,https://www.linkedin.com/in/babamalik
Aravind Reddy Yalam,Personal Project,Other,Freelance,20 Mar 2025,,https://www.linkedin.com/in/aravind-reddy-y-306ba415b
Vishnu Vardhan Reddy Muthumala,Web Development Intern,Web Development,Excelerate,20 Mar 2025,,https://www.linkedin.com/in/vishnu-vardhan-reddy-muthumala-9b237817a
Fazululla Shaik,Oracle EBS SCM Architect,Other,"Mason Companies, Inc",17 Mar 2025,,https://www.linkedin.com/in/fazululla-shaik-818037b2


In [8]:
# Save the updated DataFrame to a new CSV file
df.to_csv("linkedin_connections_categorized.csv", index=False)

In [10]:

# Save only the selected subset of columns
subset.to_csv("linkedin_connections_subset_categorized.csv", index=False)

NameError: name 'df_subset' is not defined