## Tpope's plugins

In [210]:
paths = Path('./').glob('tpope/after/plugin/*.vim')
for p in paths:
    print(p)

tpope/after/plugin/vinegar.vim
tpope/after/plugin/flagship.vim
tpope/after/plugin/dispatch.vim
tpope/after/plugin/rsi.vim
tpope/after/plugin/abolish_tpope.vim
tpope/after/plugin/commentary.vim
tpope/after/plugin/endwise.vim
tpope/after/plugin/sensible.vim
tpope/after/plugin/eunuch.vim
tpope/after/plugin/abolish.vim
tpope/after/plugin/scriptease.vim
tpope/after/plugin/sleuth.vim
tpope/after/plugin/unimpaired.vim
tpope/after/plugin/ragtag.vim
tpope/after/plugin/surround.vim


## Vim Plugin Data

Method #1

First get file data and then proccess it.

In [211]:
from pathlib import Path

paths = Path('./').glob('*.vim')
file_data = {}
for path in paths:
    with open(path) as f:
        lines = f.readlines()
        file_data[str(path).split('.')[0]] = lines

In [212]:
file_data.keys()

dict_keys(['mattdev', 'junegunn', 'thinkvim', 'linuxiscool', 'wfxr', 'mach2'])

In [213]:
vim_plug_data = []
for key, value in file_data.items():
    df = pd.DataFrame(value, columns=['lines'])
    df['author'] = key
    vim_plug_data.append(df)
    print(key, len(value))

mattdev 339
junegunn 1744
thinkvim 317
linuxiscool 584
wfxr 1079
mach2 163


In [214]:
pd.concat(vim_plug_data)

Unnamed: 0,lines,author
0,""" Vim-plug initialization\n",mattdev
1,let vim_plug_just_installed = 0\n,mattdev
2,let vim_plug_path = expand('~/.config/nvim/aut...,mattdev
3,if !filereadable(vim_plug_path)\n,mattdev
4,"echo ""Installing Vim-plug...""\n",mattdev
...,...,...
158,""" Automatically install missing plugins on sta...",mach2
159,autocmd VimEnter *\n,mach2
160,"\ if len(filter(values(g:plugs), '!isdirect...",mach2
161,\| PlugInstall --sync | q\n,mach2


Method #2

Fetch and process all in one go.

In [215]:
from pathlib import Path
import pandas as pd

paths = Path('./').glob('*.vim')
vim_plug_data = []
for path in paths:
    with open(path) as f:
        lines = f.readlines()
        file_data = pd.DataFrame(lines, columns=["lines"])
        file_data['user'] = str(path).split('.')[0]
        vim_plug_data.append(file_data)

In [216]:
pd.concat(vim_plug_data)

Unnamed: 0,lines,user
0,""" Vim-plug initialization\n",mattdev
1,let vim_plug_just_installed = 0\n,mattdev
2,let vim_plug_path = expand('~/.config/nvim/aut...,mattdev
3,if !filereadable(vim_plug_path)\n,mattdev
4,"echo ""Installing Vim-plug...""\n",mattdev
...,...,...
158,""" Automatically install missing plugins on sta...",mach2
159,autocmd VimEnter *\n,mach2
160,"\ if len(filter(values(g:plugs), '!isdirect...",mach2
161,\| PlugInstall --sync | q\n,mach2


## Analysis

In [217]:
df = pd.concat(vim_plug_data)

Word Count Total

In [218]:
wordcount = df['lines'].str.split(expand=True).stack().value_counts()

In [219]:
wordcount.rename_axis('vals').reset_index(name='count').head(20)

Unnamed: 0,vals,count
0,"""",1292
1,let,525
2,=,506
3,Plug,374
4,\,319
5,|,184
6,if,132
7,set,129
8,{,125
9,<silent>,123


Remove Symbols

In [220]:
from nltk.tokenize import RegexpTokenizer

tokenizer = RegexpTokenizer(r'\w+')
df['tokenized'] = df['lines'].apply(lambda text: ' '.join(tokenizer.tokenize(text)))

In [221]:
df

Unnamed: 0,lines,user,tokenized
0,""" Vim-plug initialization\n",mattdev,Vim plug initialization
1,let vim_plug_just_installed = 0\n,mattdev,let vim_plug_just_installed 0
2,let vim_plug_path = expand('~/.config/nvim/aut...,mattdev,let vim_plug_path expand config nvim autoload ...
3,if !filereadable(vim_plug_path)\n,mattdev,if filereadable vim_plug_path
4,"echo ""Installing Vim-plug...""\n",mattdev,echo Installing Vim plug
...,...,...,...
158,""" Automatically install missing plugins on sta...",mach2,Automatically install missing plugins on startup
159,autocmd VimEnter *\n,mach2,autocmd VimEnter
160,"\ if len(filter(values(g:plugs), '!isdirect...",mach2,if len filter values g plugs isdirectory v val...
161,\| PlugInstall --sync | q\n,mach2,PlugInstall sync q


Wordcount on non-commented lines

In [222]:
df[~df['lines'].str.startswith('"')]['tokenized'].str.split(expand=True).stack().value_counts().rename_axis('vals').reset_index(name='count').head(20)

Unnamed: 0,vals,count
0,let,465
1,Plug,417
2,vim,358
3,g,333
4,s,242
5,1,190
6,call,176
7,0,160
8,silent,158
9,a,152


Wordcount on commented lines with stopwords removed

In [223]:
from nltk.corpus import stopwords
from nltk import word_tokenize

In [224]:
stop_words = set(stopwords.words('english'))

In [225]:
df['tokenized_without_stop_words'] = df['tokenized'].apply(lambda text: ' '.join([word for word in word_tokenize(text) if word not in stop_words and len(word) > 1 and '__' not in word]))

In [226]:
df[df['lines'].str.startswith('"')]['tokenized_without_stop_words'].str.split(expand=True).stack().value_counts().rename_axis('vals').reset_index(name='count').head(25)

Unnamed: 0,vals,count
0,vim,166
1,let,72
2,Plug,72
3,CR,21
4,leader,19
5,plug,17
6,nmap,16
7,easymotion,16
8,silent,16
9,Leader,16


Uncommented lines

In [227]:
df[~df['lines'].str.startswith('"')]['tokenized_without_stop_words'].str.split(expand=True).stack().value_counts().rename_axis('vals').reset_index(name='count').head(25)

Unnamed: 0,vals,count
0,let,465
1,Plug,417
2,vim,358
3,call,176
4,silent,158
5,set,126
6,leader,122
7,nnoremap,114
8,CR,113
9,endif,111


Command count

In [228]:
commands = df[~df['lines'].str.startswith('"')]

In [231]:
commands.loc[:, 'lines'] = commands.loc[:, 'lines'].str.strip()

In [232]:
commands['lines'].value_counts().head(40)

                                                   539
endif                                               92
endfunction                                         64
augroup END                                         23
else                                                16
endfor                                              16
autocmd!                                            13
# }}}                                               11
au!                                                  8
hook_add: |                                          7
hook_source: |                                       7
\   },                                               7
return                                               7
break                                                6
endwhile                                             6
\ }                                                  5
end                                                  5
Plug 'tpope/vim-fugitive'                            5
if s:darwi

Plugin data

In [382]:
import re
plugins = commands[commands['lines'].str.startswith('Plug')].drop(['tokenized','tokenized_without_stop_words'], axis=1)
plugins['lines'] = plugins['lines'].apply(lambda s: re.findall(r'\'(.*?)\'', s)[0])

# Thinkvim plugins
with open('thinkvim.vim') as f:
    lines = f.readlines()
thinkvim = pd.DataFrame(lines, columns=['lines'])
thinkvim['user'] = 'thinkvim'
thinkvim_plugins.loc[:, 'lines'] = thinkvim[thinkvim['lines'].str.startswith('- repo')]['lines'].apply(lambda x: x.split('repo: ')[1].strip())

plugins = pd.concat([plugins, thinkvim_plugins])

# Justinmk plugins
with open('justinmk.vim') as f:
    lines = f.readlines()
justinmk = pd.DataFrame(lines, columns=['lines'])
justinmk['user'] = 'justinmk'

justinmk = justinmk[justinmk['lines'].str.contains('minpac#add')]

justinmk['lines'] = justinmk['lines'].apply(lambda s: re.findall(r'\'(.*?)\'', s)[0].replace('https://github.com/','').replace('.git',''))

plugins = pd.concat([plugins, justinmk])

In [384]:
plugins['lines'].value_counts().head(40)

tpope/vim-fugitive                7
junegunn/fzf                      6
junegunn/goyo.vim                 6
neoclide/coc.nvim                 6
tpope/vim-repeat                  5
Yggdroot/indentLine               5
junegunn/fzf.vim                  5
tpope/vim-surround                5
mbbill/undotree                   4
sheerun/vim-polyglot              4
junegunn/gv.vim                   4
iamcco/markdown-preview.nvim      4
tpope/vim-rhubarb                 4
mhinz/vim-signify                 3
tpope/vim-commentary              3
scrooloose/nerdtree               3
voldikss/vim-floaterm             3
junegunn/vim-easy-align           3
alvan/vim-closetag                3
tpope/vim-endwise                 3
airblade/vim-rooter               3
jiangmiao/auto-pairs              3
norcalli/nvim-colorizer.lua       3
ryanoasis/vim-devicons            3
liuchengxu/vista.vim              3
rhysd/git-messenger.vim           3
junegunn/limelight.vim            3
mhinz/vim-startify          

In [385]:
plugins['author'] = plugins['lines'].str.split('/').apply(lambda x: x[0])
plugins['plugin'] = plugins['lines'].str.split('/').apply(lambda x: x[1])

In [386]:
plugins

Unnamed: 0,lines,user,author,plugin
22,luochen1990/rainbow,mattdev,luochen1990,rainbow
25,junegunn/vim-emoji,mattdev,junegunn,vim-emoji
28,preservim/nerdcommenter,mattdev,preservim,nerdcommenter
31,neoclide/coc.nvim,mattdev,neoclide,coc.nvim
34,arielrossanigo/dir-configs-override.vim,mattdev,arielrossanigo,dir-configs-override.vim
...,...,...,...,...
135,chrisbra/Colorizer,justinmk,chrisbra,Colorizer
137,junegunn/fzf,justinmk,junegunn,fzf
138,junegunn/fzf.vim,justinmk,junegunn,fzf.vim
141,tpope/vim-projectionist,justinmk,tpope,vim-projectionist


In [387]:
df = plugins.groupby(['author','user']).count().sort_values('lines', ascending=False).reset_index()

In [388]:
import numpy as np

In [389]:
authors = df.pivot(index='author',columns='user',values='plugin').replace(np.nan, 0)
authors['count'] = authors.sum(axis=1)

In [390]:
authors.sort_values('count', ascending=False).head(20)

user,junegunn,justinmk,linuxiscool,mach2,mattdev,thinkvim,wfxr,count
author,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
tpope,9.0,19.0,6.0,8.0,1.0,2.0,6.0,51.0
junegunn,21.0,3.0,7.0,4.0,4.0,2.0,8.0,49.0
neoclide,1.0,0.0,3.0,1.0,1.0,1.0,2.0,9.0
mattn,0.0,0.0,0.0,3.0,1.0,2.0,3.0,9.0
justinmk,1.0,6.0,0.0,1.0,0.0,0.0,1.0,9.0
kana,0.0,0.0,0.0,0.0,0.0,6.0,2.0,8.0
AndrewRadev,2.0,1.0,0.0,1.0,0.0,1.0,2.0,7.0
Shougo,0.0,0.0,1.0,0.0,1.0,3.0,1.0,6.0
liuchengxu,0.0,0.0,1.0,2.0,0.0,3.0,0.0,6.0
mhinz,1.0,0.0,1.0,1.0,1.0,0.0,2.0,6.0


Thinkvim is a yaml file. Let's get the plugins there.

In [391]:
crosstab = pd.crosstab(plugins['lines'],plugins['user'])
crosstab['count'] = crosstab.sum(axis=1)
crosstab = crosstab.sort_values('count', ascending=False)

In [392]:
crosstab.head(40)

user,junegunn,justinmk,linuxiscool,mach2,mattdev,thinkvim,wfxr,count
lines,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
tpope/vim-fugitive,1,1,1,1,1,1,1,7
junegunn/goyo.vim,1,0,1,1,1,1,1,6
neoclide/coc.nvim,1,0,1,1,1,1,1,6
junegunn/fzf,1,1,1,1,1,0,1,6
Yggdroot/indentLine,1,0,1,0,1,1,1,5
junegunn/fzf.vim,1,1,1,1,1,0,0,5
tpope/vim-surround,1,1,1,1,0,0,1,5
tpope/vim-repeat,1,1,1,1,0,0,1,5
iamcco/markdown-preview.nvim,1,0,0,1,0,1,1,4
mbbill/undotree,1,1,0,1,0,0,1,4


In [357]:
crosstab

user,junegunn,linuxiscool,mach2,mattdev,thinkvim,wfxr,count
lines,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
neoclide/coc.nvim,1,1,1,1,1,1,6
junegunn/goyo.vim,1,1,1,1,1,1,6
tpope/vim-fugitive,1,1,1,1,1,1,6
junegunn/fzf,1,1,1,1,0,1,5
Yggdroot/indentLine,1,1,0,1,1,1,5
...,...,...,...,...,...,...,...
kana/vim-operator-user,0,0,0,0,1,0,1
kana/vim-smartchr,0,0,0,0,1,0,1
kana/vim-textobj-entire,0,0,0,0,0,1,1
kana/vim-textobj-function,0,0,0,0,1,0,1
