<a href="https://colab.research.google.com/github/nglglhtr/slack-analysis/blob/master/Kernel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Kernel Slack analysis 

In [259]:
import pandas as pd

### All members of the org (user id, name, email)

In [260]:
users = pd.read_csv('KERNEL 🌱 Member Analytics All time - Sep 10 2020.csv', usecols = ['Name', 'Email','User ID'])
users.set_index('User ID', inplace=True)
users

Unnamed: 0_level_0,Name,Email
User ID,Unnamed: 1_level_1,Unnamed: 2_level_1
U016C8XH6NR,Sachin,sachin@gitcoin.co
U017E2A2KPS,Jo-Ann A. Hamilton,joann.hamilton@gmail.com
U016PNPMFSN,arya,arya@getuni.app
U016C909613,vivek,vivek@gitcoin.co
U016QSTSQQM,Alex Thomas,hello@cawfree.com
...,...,...
U016KTMF17Y,seb,seb@zapper.fi
U016MASHCGN,pakokrew,pakokrew@gmail.com
U01A4R3KSR2,pri,pri@openlaw.io
U01ANEXFV9N,victorrortvedt,victorrortvedt@gmail.com


### Calculating `popularity_index`

- Calculate total messages sent in all channels by each user
- Calculate total reacts received across all the messages sent
- popularity_index = reacts_received / total_messages_sent

In [261]:
import glob
import numpy as np

path = r'channels'
all_files = glob.glob(path + "/*.csv")

most_popular = pd.DataFrame(np.zeros([users.shape[0], 4])*0)
most_popular.columns = ['User ID', 'reacts_received', 'messages_sent', 'popularity_index']
most_popular['User ID'] = users.index
most_popular = most_popular.set_index('User ID')

for channel in all_files:
  df = pd.read_csv(channel, usecols = ['user', 'total_reactions'])
  for index, col in df.iterrows():
    most_popular.loc[col['user'], 'reacts_received'] += col['total_reactions']
    most_popular.loc[col['user'], 'messages_sent'] = most_popular.loc[col['user'], 'messages_sent'] + 1

most_popular['popularity_index'] = (most_popular['reacts_received'] / most_popular['messages_sent'])
most_popular

Unnamed: 0_level_0,reacts_received,messages_sent,popularity_index
User ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
U016C8XH6NR,11.0,19.0,0.578947
U017E2A2KPS,8.0,3.0,2.666667
U016PNPMFSN,2.0,4.0,0.500000
U016C909613,122.0,13.0,9.384615
U016QSTSQQM,24.0,2.0,12.000000
...,...,...,...
U016KTMF17Y,0.0,0.0,
U016MASHCGN,0.0,1.0,0.000000
U01A4R3KSR2,0.0,0.0,
U01ANEXFV9N,0.0,0.0,


### Get top 100 most popular (by popularity index)

In [263]:
top_users = most_popular.sort_values('popularity_index', ascending=False).head(100)
top_users

Unnamed: 0_level_0,reacts_received,messages_sent,popularity_index
User ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
U017X1PE8HX,39.0,2.0,19.5
U01927CCPFE,15.0,1.0,15.0
U018W6481FC,27.0,2.0,13.5
U016QSTSQQM,24.0,2.0,12.0
U016Z293QVA,19.0,2.0,9.5
...,...,...,...
U016PP38YLA,0.0,1.0,0.0
U016DKFKRCP,0.0,1.0,0.0
U0169HB0BKR,0.0,1.0,0.0
U016R8NL71T,0.0,1.0,0.0


In [265]:
fin = pd.merge(top_users, users, on='User ID')
fin

Unnamed: 0_level_0,reacts_received,messages_sent,popularity_index,Name,Email
User ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
U017X1PE8HX,39.0,2.0,19.5,austin,austin@ethereum.org
U01927CCPFE,15.0,1.0,15.0,Sam Hatem,sam@outpost-protocol.com
U018W6481FC,27.0,2.0,13.5,tate,tate@arceum.co
U016QSTSQQM,24.0,2.0,12.0,Alex Thomas,hello@cawfree.com
U016Z293QVA,19.0,2.0,9.5,jordan,jordan@niau.io
...,...,...,...,...,...
U016PP38YLA,0.0,1.0,0.0,David Tomu,dgtomu@gmail.com
U016DKFKRCP,0.0,1.0,0.0,ciganek.luka,ciganek.luka@gmail.com
U0169HB0BKR,0.0,1.0,0.0,Belsy,email-me@belsy.space
U016R8NL71T,0.0,1.0,0.0,Kevin,kdm5126@gmail.com


In [266]:
fin.to_csv('top_100.csv')