# Linkediln Analytics With Python

# Quick OverView:
## ----------------------

## LinkedIn doesn't offer content analytics other than for each individual post separately. 
#### What if we want to see performance of our posts (likes, views, comments) over the past month? 
#### I wrote a program in Python that would visualize whether the number of views on my post went up or down over the past days, and here is what I got:
### -------------------------------------------------------------------------------------------------

## First I Install Selenium...
# What Actually a Selenium is???
### Selenium is a portable framework for testing web applications by multiple programming languages, including Python.
### It is a Python library that contains methods that allow to navigate through Google Chrome and other browsers (Here I used Chrome)

In [1]:
!pip install selenium



# Then I import Packages for Managing Web Scraping

In [2]:
#importing packages for managing web scrapping
from selenium import webdriver

In [3]:
!pip install bs4



In [4]:
from bs4 import BeautifulSoup
import re
import time

In [None]:
#request user input for LinkedIn username and password:
print("Please enter the exact LinkedIn username you use to login (email/phone?):")
username_string = str(input()) 
print()
print("Please enter the exact LinkedIn password:")
password_string = str(input())
print()
print("Please enter your usernmae exactly how it appears in your profile link (after '/in') :")
link_username = str(input())
print()
print("Please enter the number of the last posts you want to analyse:")
number_of_posts = int(input())

In [None]:
browser = webdriver.Chrome("chromedrivers.exe")

In [None]:
#open the LinkedIn login page and login under a specified account:
browser.get('https://www.linkedin.com/login')
#enter the specified information to login to LinkedIn:
elementID = browser.find_element_by_id('username')
elementID.send_keys(username_string)
elementID = browser.find_element_by_id('password')
elementID.send_keys(password_string)
elementID.submit()

In [None]:
#open the recent post activity page of the LinkedIn user you specified:
recent_activity_link = "https://www.linkedin.com/in/" + link_username + "-3456bb1b8/recent-activity/shares/"
browser.get(recent_activity_link)

# __________________STEP_2:_SCRAP_POST_STATS_______________

In [None]:
#calculate number of scrolls depending on the input
number_of_scrolls = -(-number_of_posts // 5)  # 5 is LinkedIn's number of posts per scroll

In [None]:
#we need a loop because we have a particular number of scrolls...
views = []

SCROLL_PAUSE_TIME = 5

In [None]:
# Get scroll height
last_height = browser.execute_script("return document.body.scrollHeight")

for scroll in range(number_of_scrolls) : 
    # Scroll down to bottom
    browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    # Wait to load page
    time.sleep(SCROLL_PAUSE_TIME)
    # Calculate new scroll height and compare with last scroll height
    new_height = browser.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

In [None]:
#query the contents (returns service reponse object with web contents, url headers, status and other):
src = browser.page_source
#beautiful soup instance:
soup = BeautifulSoup(src, features="lxml")   #lxml

#### find LIKES on LinkedIn
#### look for "span" tags that have the specific following attribute (click 'inspect' on the L-in page)
#### need to convert the list of bs4 tags into strings and then extract 
#### find these specific tags ("<stuff>") in the soup contents:

In [None]:
likes_bs4tags = soup.find_all("span", attrs = {"class" : "v-align-middle social-details-social-counts__reactions-count"})
#converts a list of 1 string to int, appends to likes list
for tag in likes_bs4tags:
    strtag = str(tag)
    #the first argument in findall (below) is a regular expression (accounts for commas in the number)
    list_of_matches = re.findall('[,0-9]+',strtag)
    #converts the last element (string) in the list to int, appends to likes list
    last_string = list_of_matches.pop()
    without_comma = last_string.replace(',','')
    likes_int = int(without_comma)
    likes.append(likes_int)

In [None]:
#find VIEWS on LinkedIn
#same concept here
views_bs4tags = soup.find_all("span", attrs = {"class" : "icon-and-text-container t-14 t-black--light t-normal"})
for tag in views_bs4tags:
    strtag = str(tag)
    list_of_matches = re.findall('[,0-9]+',strtag)
    last_string = list_of_matches.pop()
    without_comma = last_string.replace(',','')
    views_int = int(without_comma)
    views.append(views_int)  
    
print(views)

# ______________STEP_3:_DATA_VISUALISATION______________

In [5]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [None]:
# Reverse the lists
views.reverse()

# Convert lists into pandas DataFrames
views_df = pd.DataFrame(views, columns =['Views'])

In [None]:
# Get rid of the outliers
#   remove data points if further than 3 standard deviations away...
views_df_no_outliers = views_df[np.abs(views_df-views_df.median()) <= (3*views_df.std())]

#   replace NaN values (deleted outliers) with the median values
views_df_no_outliers['Views'].fillna((views_df_no_outliers['Views'].median()), inplace=True)

In [None]:
print('**************************')
print('********* VIEWS **********')
print('**************************')
coefficients_views, residuals_views, _, _, _ = np.polyfit(range(len(views_df_no_outliers)),views_df_no_outliers,1,full=True)
mse_views = (residuals_views[0])/(len(views_df_no_outliers))
nrmse_views = (np.sqrt(mse_views))/(views_df_no_outliers.max() - views_df_no_outliers.min())
slope_views = coefficients_views[0]
print('Slope: ' + str(slope_views))
print('NRMSE Error: ' + str(nrmse_views))
plt.plot(views_df_no_outliers)
plt.plot([slope_views*x + coefficients_views[1] for x in range(len(views_df_no_outliers))])
plt.title('LinkedIn Post Views for ' + link_username)
plt.xlabel('Posts')
plt.ylabel('Views')
plt.savefig(link_username + '-linkedin-views-last-' + str(number_of_posts) + '-posts-GRAPH.png', dpi=600)
plt.show()
plt.clf()

In [None]:
# Save dataframes as CSV files 
views_df_no_outliers.to_csv(link_username + '-linkedin-views-last-' + str(number_of_posts) + '-posts.csv')

# THANK-YOU!
# HAPPY CODING:)