# LinkedIn Connections

We can download your profile data and read in connections data as a CSV file
Go download your LinkedIn data here: https://www.linkedin.com/mypreferences/d/download-my-data and then add it to the `data` folder.


In [None]:
from pathlib import Path

import pandas as pd
import plotly.express as px


CSV_FILE = Path.cwd().parent / "data" / "connections.csv"

In [None]:
# Read the CSV file into a pandas DataFrame. Valid data starts at row 4
connections = pd.read_csv(CSV_FILE, skiprows=3)

In [None]:
# Connections per company
_by_company = connections.groupby("Company").size().sort_values(ascending=False).reset_index(name="Count")
_by_company["Percentage"] = _by_company["Count"] / _by_company["Count"].sum() * 100

_by_company.head(10)

In [None]:
# Connections per position.
_by_pos = connections.groupby("Position").size().sort_values(ascending=False).reset_index(name="Count")
_by_pos["Percentage"] = _by_pos["Count"] / _by_pos["Count"].sum() * 100

_by_pos.head(10)

In [None]:
# Filter by position which contains "ceo"
n_ceos = _by_pos[_by_pos["Position"].str.contains("CEO", case=False)]["Count"].sum()
print(f"There are {n_ceos} people who are CEOs.")

In [None]:
_no_nan = connections.dropna(subset=["Position"]).fillna("Unknown")
fig = px.treemap(_no_nan, path=["Company", "Position"], width=1200, height=1200)
fig.show()