In [1]:
import pandas as pd
import numpy as np
import os
import random
import plotly.offline as offline
import plotly.plotly as py
import plotly.graph_objs as go
from scipy import stats
from sklearn.linear_model import LinearRegression
from scipy.stats import chi2_contingency

%matplotlib inline
offline.init_notebook_mode(connected=True)

### Loading the truncated dataset
dataset was pre-processed by mySQL to only select the job title contains **Data Scientist and Machine Learning**

In [7]:
jobs = pd.read_csv("./../job_tmp/Jobs_ML_DS.csv")
print(jobs.shape)
display(jobs.head())

(705800, 11)


Unnamed: 0,dataset_id,domain,as_of_date,title,brand,category,locality,region,country,number_of_openings,location_string
0,90558,www.mydiscovercareer.com,2017-01-01,Data Scientist-2,,Any,Riverwoods,"Riverwoods, IL",USA,,
1,873253,esrx.jibeapply.com,2017-01-01,"Intern, Data Scientist",,General,St. Louis,Missouri,USA,,
2,85971,www.pandora.com,2017-01-01,Senior Machine Learning Engineer,Pandora,Engineering,Oakland,CA,USA,,
3,85972,www.spotify.com,2017-01-01,Machine Learning Engineer,,Data & Machine Learning,Boston,MA,USA,,"Boston, MA, USA"
4,85972,www.spotify.com,2017-01-01,Senior Machine Learning Engineer,,Data & Machine Learning,New York,NY,USA,,


### Process the data
1. change the date to pd datetime format
2. count the number of openings, all NAs are set to 1 as default

In [8]:
jobs.as_of_date = pd.to_datetime(jobs.as_of_date)

In [9]:
jobs.number_of_openings.value_counts()

1.0      1935
2.0       268
3.0        29
100.0      16
Name: number_of_openings, dtype: int64

In [10]:
### Change # of openings Nan to 1 and outliers to 1
jobs.loc[jobs.number_of_openings.isna(),'number_of_openings'] = 1
jobs.loc[jobs.number_of_openings == 100,'number_of_openings'] = 1

In [11]:
jobs.number_of_openings.value_counts()

1.0    705503
2.0       268
3.0        29
Name: number_of_openings, dtype: int64

In [13]:
### sort by time
jobs = jobs.sort_values(by='as_of_date', ascending=True).reset_index(drop=True)
jobs.head()

Unnamed: 0,dataset_id,domain,as_of_date,title,brand,category,locality,region,country,number_of_openings,location_string
0,90558,www.mydiscovercareer.com,2017-01-01,Data Scientist-2,,Any,Riverwoods,"Riverwoods, IL",USA,1.0,
1,873253,esrx.jibeapply.com,2017-01-01,"Intern, Data Scientist",,General,St. Louis,Missouri,USA,1.0,
2,864501,workingatbooking.com,2017-01-01,Sr. Data Scientist Online Advertising,Data Science - Analytics & Research,,,,,1.0,
3,85972,www.spotify.com,2017-01-01,Machine Learning Engineer,,Data & Machine Learning,Boston,MA,USA,1.0,"Boston, MA, USA"
4,85972,www.spotify.com,2017-01-01,Senior Machine Learning Engineer,,Data & Machine Learning,New York,NY,USA,1.0,


In [15]:
### jobs for DS and ML positions
DS = jobs[jobs.title.str.contains('Data Scientist')==True].reset_index(drop=True)
ML = jobs[jobs.title.str.contains('Machine Learning')==True].reset_index(drop=True)
print(DS.shape)
print(ML.shape)
display(DS.head())

(456215, 11)
(276733, 11)


Unnamed: 0,dataset_id,domain,as_of_date,title,brand,category,locality,region,country,number_of_openings,location_string
0,90558,www.mydiscovercareer.com,2017-01-01,Data Scientist-2,,Any,Riverwoods,"Riverwoods, IL",USA,1.0,
1,873253,esrx.jibeapply.com,2017-01-01,"Intern, Data Scientist",,General,St. Louis,Missouri,USA,1.0,
2,864501,workingatbooking.com,2017-01-01,Sr. Data Scientist Online Advertising,Data Science - Analytics & Research,,,,,1.0,
3,903073,careers-exlservice.icims.com,2017-01-01,Manager/Senior Manager – Data Scientist – Mach...,,,Wilmington,DE,USA,1.0,
4,85679,jobs.apple.com,2017-01-01,Data Scientist (Machine Learning & Natural Lan...,Apple,Software Engineering,Santa Clara Valley,,USA,1.0,


### Plot the time series job posting information

In [16]:
DS_total = DS.groupby('as_of_date').sum()
ML_total = ML.groupby('as_of_date').sum()
trace1 = go.Scatter(
    x = DS_total.index,
    y = DS_total.number_of_openings,
    name = 'Data Scientist'
)
trace2 = go.Scatter(
    x = ML_total.index,
    y = ML_total.number_of_openings,
    name = 'Machine Learning Engineer'
)
layout = go.Layout(
        title = "Number of Openings",
        xaxis = dict(title='Day'),
        yaxis = dict(title="Num"),
#        yaxis2 = dict(title='# of Employees', overlaying='y', side='right')
)
data = [trace1, trace2]
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig, show_link=False)

### Geographic information

In [17]:
### Count the State information
jobs['State'] = jobs.region
jobs['Type'] = 'ML'
jobs.loc[jobs.region.isin(['CA', 'Menlo Park', 'California', 'CA,California']), 'State'] = 'CA'
jobs.loc[jobs.region.isin(['WA', 'Seattle']), 'State'] = 'WA'
jobs.loc[jobs.region.isin(['VA', 'Virginia']), 'State'] = 'VA'
jobs.loc[jobs.region.isin(['MA', 'MA,Mass']), 'State'] = 'MA'
jobs.loc[jobs.title.str.contains('Data Scientist'), 'Type'] = 'DS'

In [19]:
### Top 10 states
jobs.State.value_counts()[0:10]

CA    118571
WA     83309
MA     34308
NY     28186
VA     26174
TX     14771
IL     10046
NJ      6608
MD      6580
PA      6490
Name: State, dtype: int64

In [20]:
job_state = jobs.groupby(['as_of_date', 'State']).sum()
job_state.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,dataset_id,number_of_openings
as_of_date,State,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-01-01,01,7030963,8.0
2017-01-01,AR,5239560,6.0
2017-01-01,AZ,863769,1.0
2017-01-01,Alto-Deer,1729290,2.0
2017-01-01,Arizona,868784,1.0


Plotting number of openings by top states

In [21]:
trace1 = go.Scatter(
    x = job_state.loc[pd.IndexSlice[:, 'CA'], :].index.get_level_values('as_of_date'),
    y = job_state.loc[pd.IndexSlice[:, 'CA'], :].number_of_openings,
    name = 'CA'
)
trace2 = go.Scatter(
    x = job_state.loc[pd.IndexSlice[:, 'WA'], :].index.get_level_values('as_of_date'),
    y = job_state.loc[pd.IndexSlice[:, 'WA'], :].number_of_openings,
    name = 'WA'
)
trace3 = go.Scatter(
    x = job_state.loc[pd.IndexSlice[:, 'MA'], :].index.get_level_values('as_of_date'),
    y = job_state.loc[pd.IndexSlice[:, 'MA'], :].number_of_openings,
    name = 'MA'
)
trace4 = go.Scatter(
    x = job_state.loc[pd.IndexSlice[:, 'VA'], :].index.get_level_values('as_of_date'),
    y = job_state.loc[pd.IndexSlice[:, 'VA'], :].number_of_openings,
    name = 'VA'
)
trace5 = go.Scatter(
    x = job_state.loc[pd.IndexSlice[:, 'TX'], :].index.get_level_values('as_of_date'),
    y = job_state.loc[pd.IndexSlice[:, 'TX'], :].number_of_openings,
    name = 'TX'
)
trace6 = go.Scatter(
    x = job_state.loc[pd.IndexSlice[:, 'IL'], :].index.get_level_values('as_of_date'),
    y = job_state.loc[pd.IndexSlice[:, 'IL'], :].number_of_openings,
    name = 'IL'
)
layout = go.Layout(
        title = "Num of Openings by States",
        xaxis = dict(title='Day'),
        yaxis = dict(title="Number"),
#        yaxis2 = dict(title='# of Employees', overlaying='y', side='right')
)
data = [trace1, trace2, trace3, trace4, trace5, trace6]
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig, show_link=False)

In [23]:
### Calculate year-over-year monthly growth rates by states
jobs['Month_Y'] = jobs['as_of_date'].apply(lambda x: x.strftime('%m-%Y'))
GR_State = jobs.groupby(['Month_Y', 'State']).sum()

In [24]:
GR_Half = GR_State.loc[GR_State.index.get_level_values('State').isin(['CA', 'WA', 'MA', 'NY', 'VA', 
                                                                      'TX', 'IL', 'NJ', 'MD', 'PA',
                                                                     'CO', 'GA'])]
GR_Half_2017 = GR_Half.loc[GR_Half.index.get_level_values('Month_Y').isin(['01-2017', '02-2017', '03-2017', 
                                                                        '04-2017', '05-2017', '06-2017'])]
GR_Half_2018 = GR_Half.loc[GR_Half.index.get_level_values('Month_Y').isin(['01-2018', '02-2018', '03-2018', 
                                                                        '04-2018', '05-2018', '06-2018'])]
GR_Half_2018['GR']= (GR_Half_2018.number_of_openings.get_values() - GR_Half_2017.number_of_openings.
 get_values()) / GR_Half_2017.number_of_openings.get_values()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [25]:
GR_Half_2018.GR.index.get_level_values('State')[0:12]

Index(['CA', 'CO', 'GA', 'IL', 'MA', 'MD', 'NJ', 'NY', 'PA', 'TX', 'VA', 'WA'], dtype='object', name='State')

In [27]:
### Year-over-year results of top 12 growing states
GR_data = pd.DataFrame(np.asarray(GR_Half_2018.GR.get_values()).reshape(6,12))
GR_data.columns = GR_Half_2018.GR.index.get_level_values('State')[0:12]
GR_data

State,CA,CO,GA,IL,MA,MD,NJ,NY,PA,TX,VA,WA
0,0.647493,2.472603,13.038462,2.506757,0.327114,20.25,-0.1,1.220828,-0.548295,3.977169,2.623264,-0.231998
1,0.706961,2.02,8.088889,3.520325,0.419204,10.044776,-0.312044,1.398868,0.027119,2.463158,2.138846,-0.191607
2,0.942863,0.31441,4.678571,6.738462,0.554113,3.879032,-0.533608,2.423024,0.200993,2.979933,2.016548,-0.082918
3,0.952056,0.211538,6.084337,6.241667,0.322318,2.480916,0.645161,2.318898,0.222527,2.512129,2.397163,0.020833
4,1.552846,1.134831,5.049383,3.479167,2.38438,5.441176,1.502646,5.018116,1.589286,2.710526,2.789072,10.078818
5,1.795448,0.690821,4.093458,1.608553,4.173302,5.291339,1.151659,4.084507,0.968037,1.188854,2.3099,8.510112


In [28]:
trace0 = go.Bar(
    x = GR_data.columns[0:12],
    y = GR_data.iloc[0,:].get_values(),
    name = 'Jan'
)
trace1 = go.Bar(
    x = GR_data.columns[0:12],
    y = GR_data.iloc[1,:].get_values(),
    name = 'Feb'
)
trace2 = go.Bar(
    x = GR_data.columns[0:12],
    y = GR_data.iloc[2,:].get_values(),
    name = 'Mar'
)
trace3 = go.Bar(
    x = GR_data.columns[0:12],
    y = GR_data.iloc[3,:].get_values(),
    name = 'Apr'
)
trace4 = go.Bar(
    x = GR_data.columns[0:12],
    y = GR_data.iloc[4,:].get_values(),
    name = 'May'
)
trace5 = go.Bar(
    x = GR_data.columns[0:12],
    y = GR_data.iloc[5,:].get_values(),
    name = 'June'
)
layout = go.Layout(
        title = "2017-2018 Year-over-Year Job Posting Growth Rate in Month by States",
        xaxis = dict(title='State'),
        yaxis = dict(title="Growth Rate (%/100)",
                     range=[-2, 18]),
)
data = [trace0, trace1, trace2, trace3, trace4, trace5]
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig, show_link=False, image='jpeg')

### Predicting future job postings
1. By training on the historical posting information and predicting future 30 days
2. In testing the model performance, I use all the dates before 2018-06-20, and using the next 30 days as validation set.

In [29]:
from fbprophet import Prophet

In [30]:
job_daily = jobs.groupby('as_of_date').sum().reset_index()
job_daily = job_daily.iloc[:,[0,2]]
job_daily.tail()

Unnamed: 0,as_of_date,number_of_openings
562,2018-07-17,2219.0
563,2018-07-18,1971.0
564,2018-07-19,2123.0
565,2018-07-20,2263.0
566,2018-07-21,2036.0


In [31]:
def Prediction(data, train_end: str='2018-06-20', future_days: int=30):
    df = data
    df.columns = ['ds', 'y']
    training_time = train_end
    lag = future_days
    train_index = df.loc[(df.ds==str(training_time))].index.get_values()[0]
    df_train, df_test = df[0:train_index], df[train_index:(train_index+int(lag))]
    m = Prophet(holidays_prior_scale=0.5, seasonality_prior_scale=10, yearly_seasonality=True, interval_width=0.95)
#    m.add_seasonality(name='weekly', period=7, fourier_order=80, prior_scale=50)
    m.fit(df_train)
    future = m.make_future_dataframe(periods=lag, include_history=False)
    forecast = m.predict(future)
    ffcast = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
    ffcast = ffcast.set_index(ffcast.ds)
    df = df.set_index(df.ds)
    ffcast['Orig'] = df.y
    ffcast = ffcast.reset_index(drop=True)
    ffcast.columns = ['date', 'yhat', 'yhat_lower', 'yhat_upper', 'True_Value']
    return ffcast, df_train

In [32]:
ffcast, job_daily_orig = Prediction(job_daily, train_end='2018-06-20', future_days=30)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.

Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.



In [33]:
upper_bound = go.Scatter(
    x=ffcast['date'],
    y=ffcast['yhat_upper'],
    line = dict(
        color = "#444",
        width = 1),
    opacity=.5,
    showlegend=False)

trace = go.Scatter(
    name='Prediction',
    x=ffcast['date'],
    y=ffcast['yhat'],
    mode='lines',
    line = dict(
        width = 2))

trace1 = go.Scatter(
    name='True Volume',
    x=job_daily['ds'],
    y=job_daily['y'],
    mode='lines',
    line = dict(
        width = 1.5))

lower_bound = go.Scatter(
    x=ffcast['date'],
    y=ffcast['yhat_lower'],
    line = dict(
        color = "#444",
        width = 1),
    opacity=.5,
    name='prediction bound')


data = [upper_bound, lower_bound, trace, trace1]

layout = go.Layout(
    yaxis=dict(title='daily post volume'),
    title='Job Posting Volume Prediction with 95% C.I.',
    showlegend = True)

fig = go.Figure(data=data, layout=layout)
offline.iplot(fig, show_link=False, image='png')

In [35]:
upper_bound = go.Scatter(
    name='Upper Bound',
    x=ffcast['date'],
    y=ffcast['yhat_upper'],
    mode='lines',
    marker=dict(color="#444"),
    line=dict(width=0),
    fillcolor='rgba(68, 68, 68, 0.3)',
    fill='tonexty',
    showlegend=False)

trace = go.Scatter(
    name='Prediction',
    x=ffcast['date'],
    y=ffcast['yhat'],
    mode='lines',
    line=dict(color='rgb(31, 119, 180)'),
    fillcolor='rgba(68, 68, 68, 0.3)',
    fill='tonexty')

trace1 = go.Scatter(
    name='True Volume',
    x=ffcast['date'],
    y=ffcast['True_Value'],
    mode='lines',
    line=dict(color='rgb(255,0,0)'))

lower_bound = go.Scatter(
    name='Lower Bound',
    x=ffcast['date'],
    y=ffcast['yhat_lower'],
    marker=dict(color="#444"),
    line=dict(width=0),
    mode='lines',
    showlegend=False)

data = [lower_bound, trace, trace1, upper_bound]

layout = go.Layout(
    yaxis=dict(title='daily volume'),
    title='Job Posting Volume Prediction with 95% C.I.',
    showlegend = True)

fig = go.Figure(data=data, layout=layout)
offline.iplot(fig, show_link=False)

Predicting by category, DS or ML

In [36]:
ds = jobs[jobs.Type == 'DS']
ml = jobs[jobs.Type == 'ML']
ds_daily = ds.groupby('as_of_date').sum().reset_index()
ds_daily = ds_daily.iloc[:,[0,2]]
ml_daily = ml.groupby('as_of_date').sum().reset_index()
ml_daily = ml_daily.iloc[:,[0,2]]
dsfcast, ds_orig = Prediction(ds_daily, train_end='2018-06-20', future_days=30)
mlfcast, ml_orig = Prediction(ml_daily, train_end='2018-06-20', future_days=30)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.

Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


In [37]:
### Only plotting the dates after 2018-01-01
ds_daily = ds_daily[ds_daily.ds >= '2018-01-01'].reset_index(drop=True)
ml_daily = ml_daily[ml_daily.ds >= '2018-01-01'].reset_index(drop=True)

In [38]:
### Plot the 95% prediction C.I with original posting volume and predicted volume
upper_bound_1 = go.Scatter(
    x=dsfcast['date'],
    y=dsfcast['yhat_upper'],
    line = dict(
        color = "#444",
        width = 1),
    opacity=.5,
    showlegend=False)
trace1 = go.Scatter(
    name='DS job posting prediction',
    x=dsfcast['date'],
    y=dsfcast['yhat'],
    mode='lines',
    line = dict(
        width = 2))
trace2 = go.Scatter(
    name='real DS job posting',
    x=ds_daily['ds'],
    y=ds_daily['y'],
    mode='lines',
    line = dict(
        width = 1.5))
lower_bound_1 = go.Scatter(
    x=dsfcast['date'],
    y=dsfcast['yhat_lower'],
    line = dict(
        color = "#444",
        width = 1),
    opacity=.5,
    name='95% prediction bound')
upper_bound_2 = go.Scatter(
    x=mlfcast['date'],
    y=mlfcast['yhat_upper'],
    line = dict(
        color = "#444",
        width = 1),
    opacity=.5,
    showlegend=False)
trace3 = go.Scatter(
    name='ML job posting prediction',
    x=mlfcast['date'],
    y=mlfcast['yhat'],
    mode='lines',
    line = dict(
        width = 2,
        color = 'rgb(145,191,219)'))
trace4 = go.Scatter(
    name='real ML job posting',
    x=ml_daily['ds'],
    y=ml_daily['y'],
    mode='lines',
    line = dict(
        width = 1.5,
        color = 'rgb(252.0, 141.0, 89.0)'))
lower_bound_2 = go.Scatter(
    x=mlfcast['date'],
    y=mlfcast['yhat_lower'],
    line = dict(
        color = "#444",
        width = 1),
    opacity=.5,
    showlegend=False)
data = [upper_bound_1, lower_bound_1, trace1, trace2, upper_bound_2, lower_bound_2, trace3, trace4]
layout = go.Layout(
    yaxis=dict(title='daily post volume'),
    title='Job Posting Historical Volume and Prediction',
    showlegend = True)
fig = go.Figure(data=data, layout=layout)
offline.iplot(fig, show_link=False, image='png')

### Futher study 
1. To analyze which industry fields that have the fastest growing rates in these two job position postings
2. By combining the Linked-in company field profile, to see if still the tech companys like Amazon, Apple, Facebook dominates or other sales companys like Costco, Walmarts have faster growing demands in those two positions.

In [40]:
ds10 = list(DS.domain.value_counts()[0:10].index)
ml10 = list(ML.domain.value_counts()[0:10].index)
total = list(jobs.domain.value_counts()[0:10].index)
top10 = pd.DataFrame([ds10, ml10, total]).T
top10.columns = ['DS', 'ML', 'All']
top10

Unnamed: 0,DS,ML,All
0,us-amazon.icims.com,us-amazon.icims.com,us-amazon.icims.com
1,jobs.apple.com,jobs.apple.com,jobs.apple.com
2,careers.boozallen.com,jobs.sap.com,www.facebook.com
3,www.capitalonecareers.com,www.facebook.com,jobs.sap.com
4,www.facebook.com,oracle.taleo.net,www.capitalonecareers.com
5,oracle.taleo.net,www.capitalonecareers.com,oracle.taleo.net
6,jpmchase.taleo.net,jobs.intel.com,careers.boozallen.com
7,boards.greenhouse.io,jpmchase.taleo.net,jpmchase.taleo.net
8,www-03.ibm.com,adobe.wd5.myworkdayjobs.com,www-03.ibm.com
9,jobs.sap.com,nvidia.wd5.myworkdayjobs.com,jobs.intel.com


In [41]:
jobs[jobs.domain.str.contains('capitalone')].domain.value_counts()

www.capitalonecareers.com    18201
Name: domain, dtype: int64

In [42]:
np.sum(jobs.domain.str.contains('capitalone'))

18201

In [44]:
### Load in the Linked-in data
linkedin = pd.read_csv("/home/tao/jupyter3/Data_Incu/temp_datalab_records_linkedin_company.csv")
print(linkedin.shape)
display(linkedin.head())


Columns (9,10) have mixed types. Specify dtype option on import or set low_memory=False.



(2426196, 14)


Unnamed: 0,dataset_id,as_of_date,company_name,followers_count,employees_on_platform,link,industry,date_added,date_updated,description,website,entity_id,cusip,isin
0,58329,2015-09-14,Goldman Sachs,552254,38124,https://www.linkedin.com/company/1382,Investment Banking,2015-09-14 00:00:00+00,2015-09-14 00:00:00+00,,,,,
1,58329,2015-09-15,Goldman Sachs,552862,38141,https://www.linkedin.com/company/1382,Investment Banking,2015-09-15 00:00:00+00,2015-09-15 00:00:00+00,,,,,
2,58363,2015-09-16,United Technologies,59157,14982,https://www.linkedin.com/company/2426,Aviation & Aerospace,2015-09-16 00:00:00+00,2015-09-16 00:00:00+00,,,,,
3,58366,2015-09-16,Novo Nordisk,336175,26448,https://www.linkedin.com/company/2227,Pharmaceuticals,2015-09-16 00:00:00+00,2015-09-16 00:00:00+00,,,,,
4,58371,2015-09-16,"Lowe's Companies, Inc.",134255,62574,https://www.linkedin.com/company/4128,Retail,2015-09-16 00:00:00+00,2015-09-16 00:00:00+00,,,,,


In [45]:
linkedin.industry.value_counts()[0:10]

Banking                                168364
Biotechnology                          152710
Financial Services                     148143
Oil & Energy                           116830
Retail                                  95384
Pharmaceuticals                         92107
Information Technology and Services     85066
Computer Software                       83214
Real Estate                             81195
Internet                                75450
Name: industry, dtype: int64

In [46]:
linkedin[linkedin.industry == 'Financial Services'].company_name.value_counts()

Omega Healthcare Investors                              1014
Alleghany Corporation                                   1009
Interactive Brokers                                      968
Fifth Third Bank                                         966
Nasdaq                                                   966
Barclays                                                 965
BlackRock                                                965
American Express                                         963
Navient                                                  963
Blue Hills Bank                                          963
The Carlyle Group                                        963
SVB Financial Group                                      962
CME Group                                                962
UBS                                                      961
Citi                                                     961
The Blackstone Group                                     961
Discover Financial Servi

In [48]:
linkedin[linkedin.company_name.str.contains('UBS')].tail()

Unnamed: 0,dataset_id,as_of_date,company_name,followers_count,employees_on_platform,link,industry,date_added,date_updated,description,website,entity_id,cusip,isin
2407636,58424,2018-07-13,UBS,542133,74145,https://www.linkedin.com/company/1214,Financial Services,2018-07-13 19:42:02.006908+00,2018-07-13 19:42:02.006957+00,,http://www.ubs.com,,,
2413076,58424,2018-07-14,UBS,542185,74147,https://www.linkedin.com/company/1214,Financial Services,2018-07-14 03:35:02.303989+00,2018-07-14 03:35:02.304034+00,,http://www.ubs.com,,,
2418009,58424,2018-07-15,UBS,542485,74229,https://www.linkedin.com/company/1214,Financial Services,2018-07-15 22:31:58.58565+00,2018-07-15 22:31:58.585676+00,,http://www.ubs.com,,,
2418762,58424,2018-07-16,UBS,542795,74272,https://www.linkedin.com/company/1214,Financial Services,2018-07-16 23:57:26.042971+00,2018-07-16 23:57:26.043018+00,,http://www.ubs.com,,,
2425943,58424,2018-07-17,UBS,543055,74298,https://www.linkedin.com/company/1214,Financial Services,2018-07-17 20:19:23.36402+00,2018-07-17 20:19:23.364071+00,,http://www.ubs.com,,,
