<img width="8%" alt="LinkedIn.png" src="https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/LinkedIn.png" style="border-radius: 15%">

# LinkedIn - Get post interactions
<a href="https://bit.ly/3JyWIk6">Give Feedback</a> | <a href="https://github.com/jupyter-naas/awesome-notebooks/issues/new?assignees=&labels=bug&template=bug_report.md&title=LinkedIn+-+Get+post+interactions:+Error+short+description">Bug report</a>

**Tags:** #linkedin #post #likes #comments #interactions #profiles #naas_drivers #growth #notification #automation

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel/)

**Last update:** 2023-12-12 (Created: 2023-12-12)

**Description:** This notebook retrieves information about the profiles of people who have liked or commented a specific post on LinkedIn rank them with a scoring on the interaction, try to extract the company name from the occupation and send an email recap with all data extracted in an Excel file.


<div class="alert alert-info" role="info" style="margin: 10px">
<b>Disclaimer:</b><br>
This code is in no way affiliated with, authorized, maintained, sponsored or endorsed by Linkedin or any of its affiliates or subsidiaries. It uses an independent and unofficial API. Use at your own risk.

This project violates Linkedin's User Agreement Section 8.2, and because of this, Linkedin may (and will) temporarily or permanently ban your account. We are not responsible for your account being banned.
<br>
</div>

## Input

### Import libraries

In [None]:
import naas
from naas_drivers import linkedin
import pandas as pd
import openai
import markdown2

### Setup variables
**Pre-requisite**
- [Get your cookies on LinkedIn](https://www.notion.so/LinkedIn-driver-Get-your-cookies-d20a8e7e508e42af8a5b52e33f3dba75)

**Mandatory**
- `openai_api_key`: Your OpenAI api key
- `li_at`: Cookie used to authenticate Members and API clients.
- `JSESSIONID`: Cookie used for Cross Site Request Forgery (CSRF) protection and URL signature validation.
- `linkedin_url`: This variable represents the LinkedIn post URL.
- `email_to`: List of email to send the notification.

In [None]:
# Mandatory
openai_api_key = naas.secret.get("OPENAI_API_KEY") or "YOUR_OPENAI_API_KEY"
li_at = naas.secret.get("LINKEDIN_LI_AT") or "YOUR_LINKEDIN_LI_AT" #example: AQFAzQN_PLPR4wAAAXc-FCKmgiMit5FLdY1af3-2
JSESSIONID = naas.secret.get("LINKEDIN_JSESSIONID") or "YOUR_LINKEDIN_JSESSIONID" #example: ajax:8379907400220387585
linkedin_url = ""
email_to = []

# Optional
name = linkedin_url.split("posts/")[1].split("?")[0]
csv_stats = f"{name}_stats.csv"
csv_reactions = f"{name}_reactions.csv"
csv_comments = f"{name}_comments.csv"
csv_interactions = f"{name}_interactions.csv"
csv_profiles = f"{name}_profiles.csv"
excel_output = f"{name}.xlsx"
email_content_md = f"{name}.md"

## Model

### Get post stats

In [None]:
try:
    df_stats = pd.read_csv(csv_stats)
except Exception as e:
    print(e)
    df_stats = linkedin.connect(li_at, JSESSIONID).post.get_stats(linkedin_url)
    df_stats.to_csv(csv_stats, index=False)
    
if len(df_stats) > 0:
    title = df_stats.loc[0, 'TITLE']
    author = df_stats.loc[0, 'AUTHOR_NAME']
    published_at = df_stats.loc[0, 'SUBDESCRIPTION'].replace(" • ", " ").strip() + " ago"
    views = df_stats.loc[0, 'VIEWS']
    likes = df_stats.loc[0, 'LIKES']
    comments = df_stats.loc[0, 'COMMENTS']
    shares = df_stats.loc[0, 'SHARES']
    print(f"Post published by {author}, {published_at}: '{title}'")
    print(
        "\n\t👀 Impressions\t",
        views,
        "\n\t👍 Likes\t",
        likes,
        "\n\t💬 Comments\t",
        comments,
        "\n\t⏩ Shares\t",
        shares,
    )

### Get reactions

In [None]:
try:
    df_reactions = pd.read_csv(csv_reactions)
except Exception as e:
    print(e)
    df_reactions = linkedin.connect(li_at, JSESSIONID).post.get_likes(linkedin_url)
    df_reactions.to_csv(csv_reactions, index=False)

print("Reactions:", len(df_reactions))
df_reactions.head(1)

### Get comments

In [None]:
try:
    df_comments = pd.read_csv(csv_comments)
except Exception as e:
    print(e)
    df_comments = linkedin.connect(li_at, JSESSIONID).post.get_comments(linkedin_url)
    df_comments.to_csv(csv_comments, index=False)

print("Comments:", len(df_comments))
df_comments.head(1)

### Create interactions dataset

In [None]:
def create_interactions_dataset(
    entity,
    content_title,
    published_at,
    df_reactions,
    df_comments,
):
    # Df reactions
    data_reaction = {
        "ENTITY": entity,
        "SCENARIO": published_at,
        "FIRSTNAME": df_reactions["FIRSTNAME"],
        "LASTNAME": df_reactions["LASTNAME"],
        "FULLNAME": df_reactions["FULLNAME"],
        "OCCUPATION": df_reactions["OCCUPATION"],
        "INTERACTION": "POST_REACTION",
        "INTERACTION_CONTENT": df_reactions["REACTION_TYPE"],
        "INTERACTION_SCORE": 1,
        "PROFILE_URL": df_reactions["PROFILE_URL"],
        "PUBLIC_ID": df_reactions["PUBLIC_ID"],
        "CONTENT_TITLE": content_title,
        "CONTENT_URL": df_reactions["POST_URL"],
        "DATE_EXTRACT": df_reactions["DATE_EXTRACT"],
    }
    df1 = pd.DataFrame(data_reaction)
    
    # Df comments
    data_comment = {
        "ENTITY": entity,
        "SCENARIO": published_at,
        "FIRSTNAME": df_comments["FIRSTNAME"],
        "LASTNAME": df_comments["LASTNAME"],
        "FULLNAME": df_comments["FULLNAME"],
        "OCCUPATION": df_comments["OCCUPATION"],
        "INTERACTION": "POST_COMMENT",
        "INTERACTION_CONTENT": df_comments["TEXT"],
        "INTERACTION_SCORE": 3,
        "PROFILE_URL": df_comments["PROFILE_URL"],
        "PUBLIC_ID": df_comments["PUBLIC_ID"],
        "CONTENT_TITLE": content_title,
        "CONTENT_URL": df_comments["POST_URL"],
        "DATE_EXTRACT": df_comments["DATE_EXTRACT"],
    }
    df2 = pd.DataFrame(data_comment)
    
    # Concat df
    df = pd.concat([df1, df2]).reset_index(drop=True)
    
    # Exclude Entity from Full name
    df = df[df["FULLNAME"] != entity]
    return df.reset_index(drop=True)

df_interactions = create_interactions_dataset(
    author,
    title,
    published_at,
    df_reactions,
    df_comments,
)
df_interactions.to_csv(csv_interactions, index=False)
print('🗂️ Interactions:', len(df_interactions))
df_interactions.head(5)

### Create profiles dataset

In [None]:
prompt_company = """
I will give you the occupation from a profile I get from LinkedIn, you will return the company you can extract from by checking the word after 'at' or '@'.
If you don't find it return "NA"
Don't put the results into quotes.
"""

def create_chat_completion(
    openai_api_key,
    prompt,
    message
):
    # Init
    openai.api_key = openai_api_key
    
    # Get response
    response = openai.ChatCompletion.create(
        model="gpt-4",
        temperature=0,
        messages=[
            {
                "role": "system",
                "content": prompt
            },
            {
                "role": "user",
                "content": message
            }
        ]
    )
    return response['choices'][0]['message']['content']

def create_profiles_dataset(
    df,
):
    # Groupby
    to_group = [
        "ENTITY",
        "SCENARIO",
        "FIRSTNAME",
        "LASTNAME",
        "FULLNAME",
        "OCCUPATION",
        "PROFILE_URL",
    ]
    to_agg = {
        "INTERACTION_SCORE": "sum"
    }
    # Concat df
    df = df.groupby(to_group, as_index=False).agg(to_agg)
    df.insert(loc=5, column="COMPANY", value="NA")
    df["COMPANY"] = df.apply(lambda row: create_chat_completion(openai_api_key, prompt_company, row["OCCUPATION"]), axis=1)
    df = df.sort_values(by="INTERACTION_SCORE", ascending=False)
    return df.reset_index(drop=True)

df_profiles = create_profiles_dataset(df_interactions)
df_profiles.to_csv(csv_profiles, index=False)
print('Profiles:', len(df_profiles))
df_profiles.head(10)

## Output

### Save data to Excel

In [None]:
# Create a dictionary with sheet names as keys and corresponding dataframes as values
sheet_data = {
    'Profiles': df_profiles,
    'Stats': df_stats,
    'Interactions': df_interactions,
    'Reactions': df_reactions,
    'Comments': df_comments,
}

# Create a Pandas Excel writer using the openpyxl engine
writer = pd.ExcelWriter(excel_output, engine='openpyxl')

# Iterate through the sheet_data dictionary and save each sheet to the Excel file
for sheet_name, df in sheet_data.items():
    df.to_excel(writer, sheet_name=sheet_name, index=False)

# Save the Excel file
writer.save()

### Create Email template

In [None]:
%%writefile $email_content_md
Hello,

Post published by AUTHOR, PUBLISHED: <a target='_blank' href='POST'>'TITLE'</a>.<br>

- 👍 Likes: LIKES
- 💬 Comments: COMMENTS
- ⏩ Shares: SHARES

Please find attached the data in Excel.<br>

Have a nice day.
<br>

### Create Email content

In [None]:
content = open(email_content_md, "r").read()
email_content = markdown2.markdown(content)
email_content = email_content.replace("AUTHOR", str(author))
email_content = email_content.replace("PUBLISHED", str(published_at))
email_content = email_content.replace("TITLE", str(title))
email_content = email_content.replace("POST", str(linkedin_url))
email_content = email_content.replace("LIKES", str(likes))
email_content = email_content.replace("COMMENTS", str(comments))
email_content = email_content.replace("SHARES", str(shares))
email_content

## Output

### Send Email with Excel file attached

In [None]:
email_subject = f"LinkedIn Post Analysis from {author}: '{title}'"

naas.notification.send(
    email_to=email_to,
    subject=email_subject,
    html=email_content,
    files=[excel_output],
)