In [1]:
import pandas as pd
from IPython.core.display import HTML

path = "../input/"

versions = pd.read_csv(path+"KernelVersions.csv")
kernels = pd.read_csv(path+"Kernels.csv")
users = pd.read_csv(path+"Users.csv")

language_map = {'1' : 'R','5' : 'R', '12' : 'R', '13' : 'R', '15' : 'R', '16' : 'R',
                '2' : 'Python','8' : 'Python', '9' : 'Python', '14' : 'Python'}

def pressence_check(title, tokens, ignore = []):
    present = False
    for token in tokens:
        words = token.split()
        if all(wrd.lower().strip() in title.lower() for wrd in words):
            present = True
    for token in ignore:
        if token in title.lower():
            present = False
    return present 

## check if the latest version of the kernel is about the same topic 
def get_latest(idd):
    latest = versions[versions['KernelId'] == idd].sort_values('VersionNumber', ascending = False).iloc(0)[0]
    return latest['VersionNumber']

def get_kernels(tokens, n, ignore = []):
    versions['isRel'] = versions['Title'].apply(lambda x : pressence_check(x, tokens, ignore))
    relevant = versions[versions['isRel'] == 1]
    results = relevant.groupby('KernelId').agg({'TotalVotes' : 'sum', 
                                                'KernelLanguageId' : 'max', 
                                                'Title' : lambda x : "#".join(x).split("#")[-1],
                                                'VersionNumber' : 'max'})
    results = results.reset_index().sort_values('TotalVotes', ascending = False).head(n)
    results = results.rename(columns={'KernelId' : 'Id', 'TotalVotes': 'Votes'})


    results['latest_version']  = results['Id'].apply(lambda x : get_latest(x))
    results['isLatest'] = results.apply(lambda r : 1 if r['VersionNumber'] == r['latest_version'] else 0, axis=1)
    results = results[results['isLatest'] == 1]

    results = results.merge(kernels, on="Id").sort_values('TotalVotes', ascending = False)
    results = results.merge(users.rename(columns={'Id':"AuthorUserId"}), on='AuthorUserId')
    results['Language'] = results['KernelLanguageId'].apply(lambda x : language_map[str(x)] if str(x) in language_map else "")
    results = results.sort_values("TotalVotes", ascending = False)
    return results[['Title', 'CurrentUrlSlug','Language' ,'TotalViews', 'TotalComments', 'TotalVotes', "DisplayName","UserName"]]


def best_kernels(tokens, n = 10, ignore = []):
    response = get_kernels(tokens, n, ignore)     
    hs = """<style>
                .rendered_html tr {font-size: 12px; text-align: left}
            </style>
            <h3><font color="#1768ea">"""+tokens[0].title()+"""</font></h3>
            <table>
            <th>
                <td><b>Kernel</b></td>
                <td><b>Author</b></td>
                <td><b>Language</b></td>
                <td><b>Views</b></td>
                <td><b>Comments</b></td>
                <td><b>Votes</b></td>
            </th>"""
    for i, row in response.iterrows():
        url = "https://www.kaggle.com/"+row['UserName']+"/"+row['CurrentUrlSlug']
        aurl= "https://www.kaggle.com/"+row['UserName']
        hs += """<tr>
                    <td>"""+str(i+1)+"""</td>
                    <td><a href="""+url+""" target="_blank"><b>"""  + row['Title'] + """</b></a></td>
                    <td><a href="""+aurl+""" target="_blank">"""  + row['DisplayName'] + """</a></td>
                    <td>"""+str(row['Language'])+"""</td>
                    <td>"""+str(row['TotalViews'])+"""</td>
                    <td>"""+str(row['TotalComments'])+"""</td>
                    <td>"""+str(row['TotalVotes'])+"""</td>
                    </tr>"""
    hs += "</table>"
    display(HTML(hs))

# Data Science Glossary on Kaggle

Kaggle is the place to do data science projects. There are so many algorithms and concepts to learn. Kaggle Kernels are one of the best resources on internet to understand the practical implementation of algorithms. There are almost 200,000 kernels published on kaggle and sometimes it becomes diffcult to search for the right implementation. I have used the [Meta Kaggle](https://www.kaggle.com/kaggle/meta-kaggle) database to create a glossary of data science models, techniques and tools shared on kaggle kernels. One can use this kernel as the one place to find other great kernels shared by great authors. Hope you like this kernel.  


## Contents 

<ul>
  <li>1. Regression Algorithms
    <ul>
    <li>1.1 Linear Regression</li>
    <li>1.2 Logistic Regression</li>
    </ul>
  </li>
    <li>2. Regularization Algorithms
    <ul>
    <li>2.1 Ridge Regression Regression</li>
    <li>2.2 Lasso Regression</li>
    <li>2.3 Elastic Net</li>
    </ul>
  </li>
  </li>
    <li>3. Tree Based Models
    <ul>
    <li>3.1 Decision Tree</li>
    <li>3.2 Random Forests</li>
    <li>3.3 Lightgbm</li>
    <li>3.4 XgBoost</li>
    <li>3.5 Cat Boost</li>
    </ul>
  </li>
<li>4. Neural Networks and Deep Learning
    <ul>
    <li>4.1 Neural Networks</li>
    <li>4.2 AutoEncoders</li>
    <li>4.3 DeepLearning</li>
    <li>4.4 Convolutional Neural Networks</li>
    <li>4.5 LSTMs</li>
    <li>4.6 GRUs</li>
    <li>4.7 MxNet</li>
    <li>4.8 ResNet</li>
    <li>4.9 CapsuleNets</li>
    <li>4.10 VGGs</li>
    <li>4.11 Inception Nets</li>
     <li>4.12 Computer Vision</li>
     <li>4.13 Transfer Learning</li>
     </ul>
  </li>
<li>5. Clustering Algorithms
    <ul>
    <li>5.1 K Means Clustering </li>
    <li>5.2 Hierarchial Clustering</li>
    <li>5.3 DB Scan</li>
    <li>5.4 Unsupervised Learning </li>
    </ul>
  </li>
  <li>6. Misc - Models
    <ul>
    <li>6.1 K Naive Bayes </li>
    <li>6.2 SVMs</li>
    <li>6.3 KNN</li>
    <li>6.4 Recommendation Engine </li>
    </ul>
  </li>
  <li>7.1 Data Science Techniques - Preprocessing
    <ul>
    <li>a. EDA, Exploration </li>
    <li>b. Feature Engineering </li>
    <li>c. Feature Selection </li>
    <li>d. Outlier Treatment</li>
    <li>e. Anomaly Detection</li>
    <li>f. SMOTE</li>
    <li>g. Pipeline</li>
    </ul>
  </li>
  <li>7.2 Data Science Techniques - Dimentionality Reduction
    <ul>
    <li>a. Dataset Decomposition </li>
    <li>b. PCA </li>
    <li>c. Tsne </li>
    </ul>
  </li>
  <li>7.3 Data Science Techniques - Post Modelling
    <ul>
    <li>a. Cross Validation </li>
    <li>b. Model Selection </li>
    <li>c. Model Tuning </li>
    <li>d. Grid Search </li>
    </ul>
  </li>
  <li>7.4 Data Science Techniques - Ensemblling
    <ul>
    <li>a. Ensembling </li>
    <li>b. Stacking </li>
    <li>c. Bagging</li>
    </ul>
  </li>
  <li>8. Text Data 
    <ul>
    <li>8.1. NLP </li>
    <li>8.2. Topic Modelling </li>
    <li>8.3. Word Embeddings </li>
    </ul>
  </li>
 <li>9. Data Science Tools 
    <ul>
    <li>9.1 Scikit Learn </li>
    <li>9.2 TensorFlow </li>
    <li>9.3 Theano </li>
    <li>9.4 Kears </li>
    <li>9.5 PyTorch </li>
    <li>9.6 Vopal Wabbit </li>
    <li>9.7 ELI5 </li>
    <li>9.8 HyperOpt </li>
    <li>9.9 Pandas </li>
    <li>9.10 Sql </li>
    <li>9.11 BigQuery </li>
    </ul>
  </li>
<li>10. Data Visualizations 
    <ul>
    <li>10.1. Visualizations </li>
    <li>10.2. Plotly </li>
    <li>10.3. Seaborn </li>
    <li>10.4. D3.Js </li>
    <li>10.5. Bokeh </li>
    </ul>
  </li>
</ul>

<br><br>

## 1. Regression Algorithms


In [2]:
tokens = ["linear regression"]
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Price analysis and Linear Regression,Tony Pino,Python,8085,10,43
2,Linear Regression to predict Market Value,ShubhamMaurya,R,7872,7,28
3,Health Care Cost Prediction w/ Linear Regression,def me(x),R,1549,4,21
4,Simple Linear Regression in R,zohan,R,5212,8,21
5,Predictions with XGboost and Linear Regression,MuhammetBurakErgenc,Python,15901,2,18
6,Simple Linear Regression in R (0.0648835),JT,R,3121,12,17
7,In-Depth Simple Linear Regression,Nick Brooks,Python,1044,5,15
8,Linear regression (LB: 0.0091176),Achal,Python,4359,4,14
9,Simple One Feature Linear Regression,Ariadne,Python,2036,8,12
10,Category + TF-IDF + Linear Regression,Jason King,Python,1909,11,11


In [3]:
tokens = ['logistic regression', "logistic"]
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Logistic regression with words and char n-grams,Bojan Tunguz,Python,29284,95,345
2,Logistic Regression and ROC Curve Primer,Troy Walters,R,9542,18,79
3,Logistic regression with words and char n-grams,thousandvoices,Python,5715,14,79
4,Example: Attacking logistic regression,Allunia,Python,8077,5,77
5,Bayesian Logistic Regression with rstanarm,Aki Vehtari,R,18580,15,55
6,Simple logistic model - PORTO,Sudhir Kumar,Python,4074,25,49
8,Logistic of Genetic Features,Andy Harless,Python,2752,11,35
9,Starter Logistic Regression in R,mlandry,R,7143,0,34
7,Logistic Regression TFIDF,Sudhir Kumar,Python,5440,8,32


## 2. Regularization Algorithms

In [4]:
tokens = ['Ridge']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Ridge (LB 0.41943),Serg Lavrikov,Python,6081,34,155
2,Ridge Script,Alexandru Papiu,Python,5395,24,122
3,Mercari RNN + 2Ridge models with notes (~0.42755),Patrick DeKelly,Python,7021,14,102
4,More Effective Ridge LGBM Script (LB 0.44823),Bojan Tunguz,Python,8592,24,94
5,avito_LightGBM with Ridge Feature V 2.0,Himanshu Chaudhary,Python,9586,25,85
6,LightGBM with Ridge Feature,Dan Emery,Python,2941,9,55
7,Ridge (LB: 0.0100659),Yunfeng Zhu,Python,10662,14,54
8,Wordbatch+Ridge + FM_FRTL + Target Encoding + LGBM,Samrat P,Python,2477,14,48
9,avito_LightGBM with Ridge Feature V 3.0 [0.2219],Samrat P,Python,4360,7,45
10,Modified Wordbatch + Ridge + FM_FTRL + LGB,Peter Hurford,Python,1578,7,42


In [5]:
tokens = ['Lasso']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,"House prices: Lasso, XGBoost, and a detailed EDA",Erik Bruin,R,24826,112,325
2,XGBoost + Lasso,Human Analog,Python,24220,24,104
3,Lasso model for regression problem,Boris Klyus,Python,10378,33,57
4,Lasso Ridge Implementation,Bisaria,R,6203,9,23
5,You got this!!!! Feature Engineering and Lasso,SarthakYadav,Python,3988,11,21
6,Lasso + GBM + XGBOOST - Top 20 % (0.12039) using R,Aniruddha Chakraborty,R,6045,8,19
7,XGboost + Ridge + Lasso,Julien Heiduk,Python,13154,2,19
8,FS(Lasso)+HyperParamTuning(HyperOpt),Abhilash Awasthi,Python,446,0,9
9,Top 20% - Interpretable Solution using Lasso,Telmo Felgueira,Python,396,3,9


In [6]:
tokens = ['ElasticNet']
best_kernels(tokens, 4)

0,1,2,3,4,5,6
1,Top 7% using ElasticNet with Interactions,Jack Roberts,Python,1647,16,34
2,ElasticNet (LB 0.547+) and feature importance,den3b,Python,1354,6,20
3,"Stack of SVM,ElasticNet,XGBoost,RF // ~ 0.55",Eike Dehling,Python,2972,15,19
4,House Price predict score 0.14205 by ElasticNet,JuHyung,Python,286,1,3


## 3. Tree Based Models

In [7]:
tokens = ['Decision Tree']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Decision Tree Visualization & Submission,Arda Yildirim,R,69919,30,127
2,Explore & Explain: Density & Decision Trees,msjgriffiths,R,11563,19,87
3,Introduction to Decision Trees (Titanic dataset),Diego Milla,Python,16495,17,32
4,Ensemble Prediction - Decision Tree & NNet,Sheik Mohamed Imran,R,1130,6,16
5,Decision Trees for Binary Classification (0.99),paultimothymooney,Python,1327,2,16
6,Topic 3. Decision Trees and kNN,Yury Kashnitsky,Python,98,0,11
7,Decision Tree from scratch(not sklearn),ruchit rawal,Python,135,2,8
8,Decision TREE or Random FOREST (500 trees)?,Data Framed,R,712,4,8
9,Simple Decision Tree Model for Beginners,Arnab,R,724,0,8
10,Stephen Curry's Decision Tree,DrGuillermo,Python,840,0,8


In [8]:
tokens = ['random forest']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Random Forests,DanB,Python,45391,2,488
2,Quick & Dirty RandomForest,Megan Risdal,R,37320,36,191
3,Random Forest Benchmark (R),Ben Hamner,R,112878,54,173
5,Random Forest And KNN on a few blocks,Alexandru Papiu,R,28288,36,159
6,"Feature Ranking RFE, Random Forest, linear models",Anisotropic,Python,20666,32,159
7,Random Forest Starter with numerical features,Li Li,Python,13163,18,158
4,Random Forest Benchmark,Ben Hamner,R,52015,23,90
8,Titanic Random Forest: 82.78%,ZlatanKremonic,Python,4549,24,75
9,H2O Random Forest Example (0.11578),mlandry,R,38432,27,71
10,Random forest using elemental properties,Chris Bartel,Python,3237,4,66


In [9]:
tokens = ['lightgbm', 'light gbm', 'lgb']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,[Updated 0.792 LB] LightGBM with Simple Features,Aguiar,Python,17271,78,267
2,LightGBM (Fixing unbalanced data) LB: 0.9680,Pranav Pandya,Python,17165,84,173
3,Aggregated features & LightGBM,Benjamin Minixhofer,Python,8722,31,168
4,"preprocessing, model averaging by xgb + lgb [1.39]",Alex,Python,8866,66,165
5,"EDA, feature engineering and xgb + lgb",Andrew Lukyanenko,Python,2151,7,135
6,"1st Place LGB Model(public:0.470, private:0.502)",piupiu,Python,7742,77,133
7,non-blending lightGBM model LB: 0.977,Baris Kanber,Python,11841,113,128
8,light GBM benchmark 0.3692,paulantoine,Python,19185,58,127
9,TalkingData: Added new features in LightGBM,Md Asraful Kabir,Python,12347,45,124
10,LightGBM with weighted averages & dropout [.787],James Shepherd,Python,8708,35,121


In [10]:
tokens = ['xgboost', 'xgb']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Data Analysis & XGBoost Starter (0.35460 LB),anokas,Python,71440,132,902
3,"House prices: Lasso, XGBoost, and a detailed EDA",Erik Bruin,R,24826,112,325
4,Learning to Use XGBoost,DanB,Python,33964,2,288
5,Understanding XGBoost Model on Otto Data,Tianqi Chen,R,108246,16,261
6,XGBoost CV (LB .284),Andy Harless,Python,23302,67,227
7,TalkingData XGBoost - LB: 0.966,João Pedro Peinado,Python,21585,85,186
2,Simple XGBoost Starter (~0.0655),anokas,Python,21342,36,171
8,"preprocessing, model averaging by xgb + lgb [1.39]",Alex,Python,8866,66,165
9,mxnet + xgboost baseline [LB: 0.57],n01z3,Python,21836,48,163
10,Instacart XGBoost Starter - LB 0.3791,Fabienvs,R,20306,65,154


In [11]:
tokens = ['catboost']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,LightGBM + XGBoost + Catboost,Samrat P,Python,7749,15,109
2,"Stacking Test-Sklearn, XGBoost, CatBoost, LightGBM",Eliot Barril,Python,4841,20,75
3,Simple CatBoost,Nick Brooks,Python,3605,10,60
4,Concise catboost starter ensemble (PLB: 0.06435),See--,Python,4376,16,56
5,"CatBoost, StackedAE with MXNet, Meta [1.40LB]",Tanrei(nama),Python,2572,12,46
6,CatBooStarter,Vladimir Demidov,Python,2730,2,45
7,Simple CatBoost,HaimFeldman,Python,3685,16,34
8,Naive CatBoost,Bruno G. do Amaral,Python,2123,1,27
9,CatBoost Starter (LB 0.517),Bojan Tunguz,Python,1472,13,24
10,Simple CatBoost CV (LB .281),Andy Harless,Python,3396,7,24


## 4. Neural Networks and Deep Learning Models

In [12]:
tokens = ['neural network']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Deep Neural Network Keras way,Poonam Ligade,Python,37771,89,283
2,Neural Network Approach,Claire Longo,Python,9906,24,68
3,Random Forest vs XGBoost vs Deep Neural Network,Amandeep Rathee,R,14304,3,64
4,Neural Network Model for House Prices (TensorFlow),Julien Heiduk,Python,15641,14,57
5,Surprise Me 2! Neural Networks(keras),NitinSurya,Python,7055,12,53
6,Build your own neural network in R,JunMa,R,12147,14,52
7,NEURAL NETWORK USING SGD,Jean Carlo Codogno,Python,6171,43,42
8,Recurrent Neural Network with Pytorch,Kaan Can,Python,822,10,40
9,3D Convolutional Neural Network w/o Programming,DeepMan,Python,7520,12,35
10,Embedding with Neural Network,spongebob,Python,1971,4,32


In [13]:
tokens = ['autoencoder']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Manifold Learning And Autoencoders,Alexandru Papiu,Python,5085,8,45
2,Autoencoder and Deep Features,Loic Merckel,R,2809,6,31
3,H2O - Autoencoders and anomaly detection (Python),Sheik Mohamed Imran,Python,4829,2,21
4,2D Visualization: PCA & ICA vs Autoencoders,den3b,Python,1678,6,16
5,Denoising: Autoencoders to the rescue!!,NAIN,Python,1000,5,15
6,Simple denoise autoencoder with Keras,Roberto Spadim,Python,414,15,13
7,Denoising Autoencoder,OsciiArt,Python,1050,1,13
8,1. Autoencoder with Keras,zihaox,Python,4515,5,13
9,Visualizing MNIST using a Variational Autoencoder,Rebecca Vislay Wade,Python,1651,0,13
10,A very basic Autoencoder,Scirpus,Python,859,3,10


In [14]:
tokens = ['deep learning']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Deep Learning Tutorial for Beginners,Kaan Can,Python,26552,137,353
2,Intro to Deep Learning and Computer Vision,DanB,Python,54649,34,196
5,Deep learning support [.9663],Alexander Kireev,Python,12190,94,139
6,Welcome to deep learning (CNN 99%),Peter Grenholm,Python,18644,44,132
3,A Deeper Understanding of Deep Learning,DanB,Python,8718,5,119
7,Deep learning in TF with upsampling [LB: .758],James Shepherd,Python,7716,49,119
8,EDA Recommender SystemDeep LearningModel Intuition,Badal Gupta,Python,2480,32,105
9,Deep Learning,Umberto,Python,14388,46,78
10,Starting Kit for PyTorch Deep Learning,Mamy Ratsimbazafy,Python,13325,17,61
4,Rectified Linear Units (ReLU) in Deep Learning,DanB,Python,6971,3,59


In [15]:
tokens = ['convolutional neural networks', 'cnn']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Introduction to CNN Keras - 0.997 (top 6%),Yassine Ghouzam,Python,49189,211,1059
2,Welcome to deep learning (CNN 99%),Peter Grenholm,Python,18644,44,132
3,Transfer Learning with VGG-16 CNN+AUG LB 0.1712,DeveshMaheshwari,Python,13371,79,118
4,Keras CNN - StatOil Iceberg LB 0.1995 (now 0.1516),TheGruffalo,Python,9665,72,90
5,Digit recognizer in Python using CNN,Koba,Python,39294,19,84
6,CNN with Keras,bgo,Python,12383,17,79
7,Bi-GRU-CNN-Poolings,MengYe,Python,5874,24,73
8,Cancer Image TensorFlow CNN 80% Valid. Acc.,Raoul,Python,5574,8,64
9,"1D CNN (single model score: 0.14, 0.16 or 0.23)",Alex,Python,5841,21,60
10,Data augmentation with keras into CNN,giim,Python,7402,28,58


In [16]:
tokens = ['lstm']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,LSTM with word2vec embeddings,lystdo,Python,50238,170,205
3,Improved LSTM baseline: GloVe + dropout,Jeremy Howard,Python,18219,34,203
5,Keras - Bidirectional LSTM baseline ( lb 0.069),CVxTz,Python,19394,41,197
4,Minimal LSTM + NB-SVM baseline ensemble,Jeremy Howard,Python,14287,14,156
6,Bidirectional LSTM with Convolution,Ashish Gupta,Python,9930,44,86
7,"keras lstm attention glove840b,lb 0.043",qianqian,Python,8304,18,81
2,[ LB 0.18+ ] LSTM with GloVe and magic features,lystdo,Python,7208,75,70
8,"Basic NLP: Bag of Words, TF-IDF, Word2Vec, LSTM",ReiiNakano,Python,16906,18,68
9,Explore TS with LSTM,Vladimir Demidov,Python,4686,14,61
10,LSTM_Stock_prediction-20170507,BenF,Python,20288,36,57


In [17]:
tokens = ['gru']
ignore = ['grupo']
best_kernels(tokens, 10, ignore)

0,1,2,3,4,5,6
1,Pooled GRU + FastText,Vladimir Demidov,Python,20972,38,220
2,Capsule net with GRU,chongjiujjin,Python,10116,33,147
3,Bi-GRU-CNN-Poolings,MengYe,Python,5874,24,73
4,(How to get 81%) GRU-ATT + LGBM + TF-IDF + EDA,Peter,Python,2689,30,69
5,LGB + GRU + LR + LSTM + NB-SVM Average Ensemble,Peter Hurford,Python,7195,13,53
6,Pooled GRU (with preprocessing),Prashant Kikani,Python,3769,20,46
7,"GRU(25-12-12)_with_Keras(512-64,relu)_SGDR_LB0.432",yyll008,Python,5011,12,45
8,NY Stock Price Prediction RNN LSTM GRU,Raoul,Python,12908,12,41
9,Pooled GRU + GloVe trainable,Shujian Liu,Python,1927,10,21


In [18]:
tokens = ['mxnet']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,mxnet + xgboost baseline [LB: 0.57],n01z3,Python,21836,48,163
2,mxnet + xgboost simple solution,n01z3,Python,16020,24,57
3,"CatBoost, StackedAE with MXNet, Meta [1.40LB]",Tanrei(nama),Python,2572,12,46
4,R Mxnet simple tutorial,miguel perez,R,15105,1,17
5,R & MXNET,gmilosev,R,8583,17,13
6,XGBoost + mxnet in R,Paweł Romański,R,5111,13,11
7,Digit Recognizer - Using Mxnet 2,BlastChar,R,119,0,4
8,MXNET with R starter kit,jeremie_db,R,2802,5,4
9,deep NN with MXnet,Tornadozou,Python,1665,0,3
10,mxnet // cnn_1d 0.945 acc [FULL-SET],Lefteris Fanioudakis,Python,36,0,2


In [19]:
tokens = ['resnet']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,EDA and CNN (resnet-18) (LB 0.2094),Prince Grover,Python,2790,7,35
2,Complete process using ResNet as a starting point,Rodney Thomas,Python,5240,28,31
3,End-to-End ResNet50 with TTA [LB ~0.93],Sasha Korekov,Python,1914,10,31
4,Objects + Bounding Boxes using Resnet50 - ImageAI,sban,Python,1806,11,22
5,resnet50 features + xgboost,n01z3,Python,7299,4,19
6,Keras ResNet with image augmentation,AndreasFalkoven,Python,1419,1,17
7,ResNet50 Example,beluga,Python,2718,3,12
8,Feature Extraction by ResNet (keras,Chia-Ta Tsai,Python,1795,8,11
9,Fast.ai - ResNet101 (99.5% or better),Mario Lurig,Python,270,4,10
10,LB [0.56130] - resnet50 features + xgboost,Travis Glines,Python,2324,2,10


In [20]:
tokens = ['Capsule network', 'capsulenet']
best_kernels(tokens, 5)

0,1,2,3,4,5,6
1,Beginner's Guide to Capsule Networks,Zafar,R,10613,52,298
2,CapsuleNet on MNIST,Kevin Mader,Python,29723,21,142
3,CapsuleNet on Fashion MNIST,Kevin Mader,Python,4290,0,43
4,A Beginner's guide to Capsule Networks,AnkitJha,Python,371,8,11


In [21]:
tokens = ['vgg']
best_kernels(tokens, 5)

0,1,2,3,4,5,6
1,Transfer Learning with VGG-16 CNN+AUG LB 0.1712,DeveshMaheshwari,Python,13371,79,118
2,Extract avito image features via keras VGG16,DUO,Python,4165,5,73
3,Keras VGG19 Starter,0rangutan,Python,9522,24,61
4,VGG16 Train features,Bruno G. do Amaral,Python,2974,10,60
5,use Keras pre-trained VGG16 acc 98%,fujisan,Python,24134,35,56


In [22]:
tokens = ['inception']
best_kernels(tokens, 5)

0,1,2,3,4,5,6
1,Keras Inception + Xception (0.47),杨培文 (Yang Peiwen),Python,7034,22,57
2,Keras k-fold Inception V3 (1st place LB 0.99770),James Requa,Python,4747,17,32
3,0.99 with R and Keras (Inception V3 fine-tune),Ogurtsov,R,2280,5,27
4,Using InceptionV3 features - SVM classifier,Craig Glastonbury,Python,11499,33,24
5,WavCeption V1: a 1-D Inception approach (LB 0.76),Turing,Python,1407,22,18


In [23]:
tokens = ['computer vision']
best_kernels(tokens, 5)

0,1,2,3,4,5,6
1,Intro to Deep Learning and Computer Vision,DanB,Python,54649,34,196
2,Exercise: Convolutions for Computer Vision,DanB,Python,23919,19,80
3,Basic Pure Computer Vision Segmentation (LB 0.229),Gábor Vecsei,Python,6664,7,66
4,Plant Seedlings Fun with Computer Vision,Gábor Vecsei,Python,4842,20,61
5,Optimizing Computer Vision Segmentation,Kevin Mader,Python,3047,2,20


In [24]:
tokens = ['transfer learning']
best_kernels(tokens, 5)

0,1,2,3,4,5,6
1,Transfer Learning with VGG-16 CNN+AUG LB 0.1712,DeveshMaheshwari,Python,13371,79,118
2,Transfer Learning,DanB,Python,17804,50,76
3,Exercise: Using Transfer Learning,DanB,Python,4415,2,27
4,Fruits-360 - Transfer Learning using Keras,IshanSohony,Python,892,13,20
5,VGG16 Transfer Learning - Pytorch,Carlo Alberto,Python,4557,4,13


## 5. Clustering Algorithms 

In [25]:
tokens = ['kmeans', 'k means']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Principal Component Analysis with KMeans visuals,Anisotropic,Python,46089,55,202
2,Log MA and Days of Week Means (LB: 0.529),Paulo Pinto,Python,10448,31,143
3,Aggregates + SumValues + SumZeros + K-Means + PCA,Samrat P,Python,3142,20,43
4,Clustering wines with k-means,Xavier,R,2219,12,42
5,Visualizing K-Means with Leaf Dataset,Selfish Gene,Python,4247,1,36
6,3D Kmeans animation,DrGuillermo,Python,1031,3,15
7,kmeans_example,kajot,Python,6370,27,12
8,K-means Clustering of 1 million headlines,Siddharth Yadav,Python,651,6,11
9,Simple K-means clustering on the Iris dataset,Tim I,Python,7651,1,11
10,Using K-Means Clustering to Predict Helpfulness,Amee Amin,Python,6972,6,10


In [26]:
tokens = ['hierarchical clustering']
best_kernels(tokens, 3)

0,1,2,3,4,5,6
1,Hierarchical Clustering vs. k-Means,Ralph Schlosser,R,2705,2,2
2,Hierarchical Clustering for Iris dataset,Chakra,R,396,0,1


In [27]:
tokens = ['dbscan']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,mod DBSCAN x 100 (parallel),Grzegorz Sionkowski,R,7746,62,94
2,DBSCAN Benchmark,Mikhail Hushchyn,Python,4657,21,49
3,HDBSCAN clustering II,Luis Andre Dutra e Silva,Python,1779,4,38
4,DBSCAN Benchmark improvement - 0.2099,Yair Beer,Python,1017,8,24
5,Chocolate ratings-Outlier analysis with DBScan,Teza,Python,1535,0,21
6,DBSCAN for CERN,Byfone,Python,2214,4,20
7,"Starter DBSCAN, Validation, Creating a Submission",Robert Tacbad,Python,706,2,13
8,classifier+hdbscan+helixFitting,Siddhartha,Python,818,3,10
9,HDBSCAN and scaling of the coordinates,Alexander Zinovev,Python,669,3,9


In [28]:
tokens = ['unsupervised']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Unsupervised Anomaly Detection,Victor Ambonati,Python,14125,7,37
2,Unsupervised and supervised neighborhood encoding,Darnal,Python,1799,5,36
3,Taxi Interac Shiny App + Unsupervised Learning,Owen Ouyang,R,954,17,32
4,"3D Plotly, Unsupervised Learning, and lightGBM",Owen Ouyang,R,777,0,26
5,Distilled Features & Unsupervised Learning,AlexanderGeiger,Python,691,2,18
6,Topic 7. Unsupervised learning: PCA and clustering,Yury Kashnitsky,Python,439,1,17
7,Creating Customer Segments - Unsupervised Learning,Samrat P,Python,793,2,12
8,Unsupervised Segmentation with Type-Separation,Gábor Vecsei,Python,498,0,5
9,Unsupervised Approach-Kmeans clustering,YugandharTripathi,Python,442,0,4
10,Supervised & Unsupervised classification,bpali26,R,894,0,4


## 6. Misc - Models 

In [29]:
tokens = ['naive bayes']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Naive Bayesian Network with 7 features,J.Jones,Python,1696,8,22
2,"NCI Thesaurus & Naive Bayes (vs RF, GBM, GLM & DL)",Loic Merckel,R,2311,8,17
3,Benouilli Naive Bayes,Scirpus,Python,5022,18,13
4,Credit Card Fraud Detection: KNN & Naive Bayes,Yura Shakhnazaryan,R,1929,3,9
5,Simple Naive Bayes & XGBoost,Sudhir Kumar,Python,1038,4,8
6,Naive Bayes without a ML Library,Ryder,Python,348,1,8
7,Spooky Simple Naive Bayes Scores ~0.399,Tom Nelson,Python,561,2,8
8,Fraud Detection with Naive Bayes Classifier,Lovedeep Saini,Python,2944,1,8
9,Bernoulli Naive Bayes - AUC 59%,Kate,Python,3052,1,8
10,Sentiment Analysis using LR & Naive Bayes,Megabus,R,2335,2,7


In [30]:
tokens = ['svm']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,NB-SVM strong linear baseline,Jeremy Howard,Python,47610,102,580
2,Minimal LSTM + NB-SVM baseline ensemble,Jeremy Howard,Python,14287,14,156
3,"Visualizing KNN, SVM, and XGBoost on Iris Dataset",Gabriel Kerr,Python,11715,17,80
4,LGB + GRU + LR + LSTM + NB-SVM Average Ensemble,Peter Hurford,Python,7195,13,53
5,What's Cooking : TF IDF with OvR SVM,sban,Python,1229,19,33
6,"Fraud Detection by Random Forest,DT and SVM",swamysm,R,3304,5,32
7,LSTM with BN + NB-SVM + LR on Conv AI(lb 0.041),Ivan,Python,6396,11,31
8,"Wine EDA, DT, RF, xgb, LightGBM, SVM, and h2o",Owen Ouyang,R,1458,17,28
9,SVM 0.6+,rerock,R,4050,0,25


In [31]:
tokens = ['knn']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Random Forest And KNN on a few blocks,Alexandru Papiu,R,28288,36,159
2,"Visualizing KNN, SVM, and XGBoost on Iris Dataset",Gabriel Kerr,Python,11715,17,80
3,grid_knn,Sandro,Python,9395,27,36
4,kNN from scratch in Python at 97.1%,Surya Teja Cheedella,Python,6484,9,24
5,"Comparing random forest, PCA and kNN",samuel,Python,4243,12,23
6,kNN approach,Mikhail Hushchyn,Python,2799,5,22
7,kNN from scratch in Python at 97.1%,Minesh A. Jethva,Python,3506,11,19
8,Rental List: KNN on lat/long data,MZH,Python,1959,2,17
9,grid_knn,David,Python,5456,17,14
10,"Breast cancer prediction: KNN, SVC, and Logistic",Sara G. Mille,Python,3169,7,13


In [32]:
tokens = ['recommendation engine']
best_kernels(tokens, 5)

0,1,2,3,4,5,6
1,Film recommendation engine,FabienDaniel,Python,36734,33,227
2,Simple content-based recommendation engine,cclark,Python,9682,4,19
3,Film recommendation engine- converted to use TMDb,Sohier Dane,Python,1884,4,15
4,Recommendation_Engine + EDA...DonorsChoose,Enigma,Python,634,3,15


## 7. Important Data Science Techniques

### 7.1 Preprocessing

In [33]:
tokens = ['EDA', 'exploration']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Comprehensive data exploration with Python,Pedro Marcelino,Python,216656,503,2239
2,Simple Exploration Notebook - Zillow Prize,SRK,Python,52798,111,788
3,Be my guest - Recruit Restaurant EDA,Heads or Tails,R,40631,188,717
6,EDA To Prediction(DieTanic),"I,Coder",Python,32489,108,655
7,Speech representation and data exploration,DavidS,Python,36789,87,632
8,Mercari Interactive EDA + Topic Modelling,ThyKhueLy,Python,37294,77,597
4,Steering Wheel of Fortune - Porto Seguro EDA,Heads or Tails,R,37596,198,554
5,NYC Taxi EDA - Update: The fast & the curious,Heads or Tails,R,42208,156,464
9,Home Credit : Complete EDA + Feature Importance ✓✓,Lathwal,Python,25391,100,414
10,$ - Toxic Comments EDA,Jagan,Python,23905,82,402


In [34]:
tokens = ['feature engineering']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Feature Engineering & Importance Testing,NanoMathias,Python,17709,104,288
3,Introduction to Manual Feature Engineering,Will Koehrsen,Python,7884,29,176
5,"EDA, feature engineering and xgb + lgb",Andrew Lukyanenko,Python,2151,7,135
6,Creative Feature Engineering (LB 0.35),BuryBuryZymon,Python,5617,28,130
7,Feature Engineering and Visualization,Marcel Spitzer,R,11429,32,111
8,Feature engineering,Eike Dehling,Python,5676,22,95
4,Automated Feature Engineering Basics,Will Koehrsen,Python,4837,15,86
9,Feature Engineering & Validation Strategy,SRK,Python,6351,25,86
10,HOME CREDIT - BUREAU DATA - FEATURE ENGINEERING,Shanth,Python,4051,25,79
2,Feature Engineering Benchmarks,NanoMathias,Python,4914,23,75


In [35]:
tokens = ['feature selection']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Feature Selection and Data Visualization,Kaan Can,Python,36341,186,448
2,Feature Selection with Null Importances,olivier,Python,4535,43,118
3,Introduction to Feature Selection,Will Koehrsen,Python,3832,22,84
4,Exploratory study on feature selection,Santhosh Sharma Ananthramu,Python,10994,8,83
5,Feature Selection and Prediction,ZhiboYang,Python,11936,28,54
6,6 Ways for Feature Selection,oskird,Python,2118,15,50
7,Easy Feature Selection pipeline: 0.55+ at LB,Arseny Kravchenko,Python,1832,5,32
8,Feature Selection and Ensemble of 5 Models,Li-Yen Hsu,Python,3430,6,20
9,Model-based Feature Selection (Newbie),Jason Liu,Python,3971,0,19
10,Using XGBoost For Feature Selection,MeiChengShih,Python,5046,6,17


In [36]:
tokens = ['outlier treatment', 'outlier']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Unrolling of helices + outliers removal,Luis Andre Dutra e Silva,Python,6425,24,135
2,XGB w/o outliers & LGB with outliers combined,Andy Harless,Python,14840,46,87
3,Home Credit EDA: Distributions and Outliers,nlgn,Python,1428,14,42
4,Standard Prices vs. Outliers,Andrea,Python,2221,4,39
5,You want outliers? We got them outliers!,Tilii,Python,4007,17,36
6,Beware of Outliers !!,SRK,Python,9803,2,35
7,Outlier Detection Practice: uni/multivariate,Kevin Arvai,Python,959,15,29
8,Data Cleaning Challenge: Outliers,Rachael Tatman,R,2119,26,25
9,XGBoost without outliers ( LB ~ 0.06450 ),InfiniteWing,Python,3777,5,22
10,Analysis of clusters and outliers,Marcel Spitzer,R,1723,4,19


In [37]:
tokens = ['anomaly detection', 'anomaly']
best_kernels(tokens, 8)

0,1,2,3,4,5,6
1,Semi-Supervised Anomaly Detection Survey,Matheus Facure,Python,6774,8,38
2,Unsupervised Anomaly Detection,Victor Ambonati,Python,14125,7,37
3,Anomaly Detection using Gaussian Distribution,Sachin Shelar,Python,4738,8,35
4,H2O - Autoencoders and anomaly detection (Python),Sheik Mohamed Imran,Python,4829,2,21
5,Numerical feature density -> anomaly detection?,Alexander Galea,Python,3903,10,16
6,Anomaly Detection Using Tensorflow,Loic Merckel,Python,2360,0,13
7,Time Series and anomaly detection,byliu,Python,5374,0,12
8,Anomaly Detection,Pradeep Babburi,R,4380,0,11


In [38]:
tokens = ['smote']
best_kernels(tokens, 5)

0,1,2,3,4,5,6
1,Credit Card Fraud Prediction - [RF + SMOTE],Leonardo Ferreira,Python,1376,8,25
2,Fraud Detection with SMOTE and XGBoost in R,Bono,R,2300,1,10
3,Fraud detection with SMOTE and RandomForest,Christophe Taret,Python,4270,4,9
4,"SMOTE in R using Treebag, AUC 0.98",ML_Enthusiast,R,1976,0,7


In [39]:
tokens = ['pipeline']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Titanic Survival Prediction End to End ML Pipeline,Poonam Ligade,Python,26955,64,278
2,Pipelines,DanB,Python,14892,29,142
3,"Titanic: Voting, Pipeline, Stack, and Guide",Nick Brooks,Python,7891,27,122
4,Full pipeline demo: poly -> pixels -> ML -> poly,Konstantin Lopuhin,Python,12843,24,118
5,"Pipeline Kernel, xgb + fe [LB1.39]",Sergey Kabanov,Python,4359,38,112
6,A Deep Dive Into Sklearn Pipelines,dbaghern,Python,6989,16,70
7,Preprocessing Pipeline and Convnet Trainer,Brian Farrar,Python,7177,32,62
8,Easy Feature Selection pipeline: 0.55+ at LB,Arseny Kravchenko,Python,1832,5,32
9,A Complete ML Pipeline Tutorial (ACU ~ 86%),Pourya,Python,2437,2,28
10,Advanced Pipelines tutorial,Aashita Kesarwani,Python,785,19,26


### 7.2 Dimentionality Reduction

In [40]:
tokens = ['dataset decomposition', 'dimentionality reduction']
best_kernels(tokens, 2)

0,1,2,3,4,5,6
1,Dataset Decomposition Techniques,sban,Python,3301,16,93
2,Dimentionality Reduction SVD in batch,leexa,Python,1490,5,3


In [41]:
tokens = ['PCA']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Customer Segments with PCA,Andrea,Python,15903,16,114
2,Tutorial: PCA Intuition and Image Completion,Kaan Can,Python,3407,43,109
3,"Dimensionality reduction (PCA, tSNE)",Tilii,Python,6579,32,94
4,Visualizing PCA with Leaf Dataset,Selfish Gene,Python,7579,18,66
5,"All You Need is PCA (LB: 0.11421, top 4%)",massquantity,Python,4259,19,55
6,PCA visualization,Tuomas Tikkanen,Python,17493,20,45
7,Aggregates + SumValues + SumZeros + K-Means + PCA,Samrat P,Python,3142,20,43
8,"Use Partial PCA for Collinearity, LB ~0.328 w/ XGB",Gccering,Python,1084,7,38
9,TSNE vs PCA,Mary Vikhreva,Python,7628,4,36


In [42]:
tokens = ['Tsne', 't-sne']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,"Dimensionality reduction (PCA, tSNE)",Tilii,Python,6579,32,94
3,Visualizing Word Vectors with t-SNE,Jeff Delaney,Python,18437,20,74
4,Mapping digits with a t-SNE lens,Triskelion,Python,10874,5,36
5,TSNE vs PCA,Mary Vikhreva,Python,7628,4,36
6,TSNE & PCA Quick and Dirty Visuals,Anisotropic,Python,2922,8,29
7,clusters in 2D with tsne VS pca,puyokw,R,20646,3,29
8,PCA and T-SNE,Øystein Schønning-Johansen,Python,2824,8,28
2,Four Blob TSNE - with (legal) supplements,Tilii,Python,1452,9,26
9,Visualization on a 2D map (with t-SNE),Jean-Matthieu Schertzer,R,4531,2,25


### 7.3 Post Modelling Techniques

In [43]:
tokens = ['cross validation']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Cross-Validation,DanB,Python,16135,27,103
2,"Cross-validation, weighted linear blending, errors",Tilii,Python,3058,19,60
3,Correct time-aware cross-validation scheme,Yury Kashnitsky,Python,1408,2,30
4,Manager Skill for Cross-Validation Pipelines,Maximilian Hahn,Python,3010,18,26
5,Cross-Validation Methodology Using '16 Golden Week,BreakfastPirate,R,1184,2,24
6,Simple Grasp Cross-validation,Alexandre Barachant,Python,2528,2,13
7,Training set split for cross validation,Yifan Xie,Python,4805,13,12
8,Proper Cross-Validation,Stergios,Python,2721,5,12
9,Simple Keras Model with k-fold cross validation,Stefanie04736,Python,7707,1,9
10,"Data Augmentation, Cross-Validation, Ensemble",Sharif Amit,Python,416,5,9


In [44]:
tokens = ['model selection']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,"Quora EDA & Model selection (ROC, PR plots)",Philipp Schmidt,Python,21720,35,212
2,Cervix EDA & Model selection,Philipp Schmidt,Python,19735,34,159
3,Montecarlo Model Selection,ForzzeeTeam,Python,2398,7,45
4,Feature Selection and Ensemble of 5 Models,Li-Yen Hsu,Python,3430,6,20
5,Model-based Feature Selection (Newbie),Jason Liu,Python,3971,0,19
6,Model and feature selection with Python,Sergio Rodrigues,Python,4985,1,15
7,Cold Calls: Data Mining and Model Selection,Emma Ren,Python,2018,3,14
8,In Depth Model Selection,dataWrangler,R,748,0,5
9,EDA and Model Selection,Amol Mavuduru,Python,171,0,3
10,[.96 acc] Model Selection + Hyperparameter Tuning,Quan Nguyen,Python,505,0,2


In [45]:
tokens = ['model tuning', 'tuning']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,R xgboost with caret tuning and gini score,Troy Walters,R,10423,11,67
2,Automated Model Tuning,Will Koehrsen,Python,2112,7,64
3,Intro to Model Tuning: Grid and Random Search,Will Koehrsen,Python,2102,22,60
5,LGBM CV Tuning and Seed Diversification,Nick Brooks,Python,2059,12,58
6,Microsoft LightGBM with parameter tuning (~0.823),GarethJones,Python,14994,14,33
4,Tuning Automated Feature Engineering (Exploratory),Will Koehrsen,Python,1715,15,29
7,Updated: XGBoost with parameter tuning,Jason Liu,R,26912,14,27
8,XGBoost parameter tuning template,Norbert Kozlowski,R,23719,5,26
9,Parameter tuning : 5 x 2-fold CV statistical test,olivier,Python,1247,8,26
10,Tuning Random Forest Parameters,Daniel Haden,Python,14650,14,21


In [46]:
tokens = ['gridsearch', 'grid search']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Intro to Model Tuning: Grid and Random Search,Will Koehrsen,Python,2102,22,60
2,Hyperparameter Grid Search with XGBoost,Tilii,Python,6767,14,45
3,Grid search xgboost with scikit-learn,Kazuaki Tanida,Python,31687,8,22
4,xgboost with GridSearchCV,phunter,Python,23695,13,22
5,GridSearchCV with feature in xgboost,phunter,Python,12619,25,19
6,Hyper Parameter Optimization with Grid Search,Pranav Pandya,R,1086,4,17
7,GridSearchCV + XGBRegressor (0.556+ LB),Omar Essam,Python,5780,6,16
8,GridSearch vs RandomizedSearch on XGboostRegressor,Eliot Barril,Python,3624,2,12
9,Parameter Tuning - Random Forest - GridsearchCV,Swaroop Kallakuri,Python,863,12,9
10,Random Forest from grid search to hyperopt,RpyGamer,Python,2551,0,8


### 7.4 Ensemblling

In [47]:
tokens = ['ensemble']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Titanic Top 4% with ensemble modeling,Yassine Ghouzam,Python,31976,139,562
2,Detailed Data Analysis & Ensemble Modeling,Tanner Carbonati,R,28322,75,235
3,Ensemble Model: Stacked Model Example,JMT5802,R,44185,69,163
4,Minimal LSTM + NB-SVM baseline ensemble,Jeremy Howard,Python,14287,14,156
5,EDA & Ensemble Model (Top 10 Percentile),Vivek Srinivasan,Python,18235,28,124
6,Porto Seguro Tutorial: end-to-end ensemble,Yifan Xie,Python,3352,6,86
7,Concise catboost starter ensemble (PLB: 0.06435),See--,Python,4376,16,56
8,ML-Ensemble: Scikit-learn style ensemble learning,flnr,Python,8500,33,55
9,LGB + GRU + LR + LSTM + NB-SVM Average Ensemble,Peter Hurford,Python,7195,13,53
10,Statoil CSV PyTorch SENet ensemble LB 0.1520,QuantScientist,Python,7503,43,48


In [48]:
tokens = ['stacking', 'stack']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Introduction to Ensembling/Stacking in Python,Anisotropic,Python,222261,505,2001
2,Stacked Regressions : Top 4% on LeaderBoard,Serigne,Python,83749,294,1376
3,Explore Stacking (LB 0.1463),DSEverything,Python,12950,25,166
4,Ensemble Model: Stacked Model Example,JMT5802,R,44185,69,163
5,Simple Stacker LB 0.284,Vladimir Demidov,Python,14895,77,153
6,stacked then averaged models [~ 0.5697],Hakeem,Python,19681,55,126
7,Stacking Starter,Faron,Python,23989,27,125
8,"Titanic: Voting, Pipeline, Stack, and Guide",Nick Brooks,Python,7891,27,122
9,let's walk through stackoverflow worldwide survey,RanjeetJain,Python,2915,40,120
10,OOF stacking regime,Håkon Hapnes Strand,Python,7127,50,119


In [49]:
tokens = ['bagging']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Keras starter with bagging (LB: 1120.596),Danijel Kivaranovic,Python,14192,96,76
2,Keras starter with bagging 1111.84364,MT,Python,16291,101,69
3,Predicting House Prices [XGB/RF/Bagging-Reg Pipe],Leonardo Ferreira,Python,1575,5,25
4,UnderBagging | AUC = ~0.95,Rodrigo Santis,Python,4495,14,15
5,Naive Bagging CNN(PB0.985),Finlay Liu,Python,1777,14,13
6,Topic 5. Ensembles. Part 1. Bagging,Yury Kashnitsky,Python,21,0,9
7,Boo! Keras + XGBoost bagging starter,David Kazaryan,Python,2872,3,3
8,Keras starter with bagging 1111.84364,Weimin Wang,Python,1832,0,2


## 8. Text Data

In [50]:
tokens = ['NLP', 'Natural Language Processing', 'text mining']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Spooky NLP and Topic Modelling tutorial,Anisotropic,Python,36195,136,492
2,Approaching (Almost) Any NLP Problem on Kaggle,Abhishek,Python,35276,72,354
3,OMG! NLP with the DJIA and Reddit!,Andrew Gelé,Python,22202,29,72
4,"Basic NLP: Bag of Words, TF-IDF, Word2Vec, LSTM",ReiiNakano,Python,16906,18,68
5,"Text Mining with Sklearn /Keras (MLP, LSTM, CNN)",Eliot Barril,Python,10687,21,51
6,Fun in Text Mining with Simpsons,Bukun,R,4547,25,51
7,NLP in R: Topic Modelling,Rachael Tatman,R,10949,60,49
8,Scary NLP with SpaCy and Keras,Aaron Marquez,Python,7310,14,43
9,Detailed NLP Project (Prediction & Visualization),Debadri Dutta,Python,2432,17,42
10,Applying Text Mining,Kaan Can,Python,185,2,37


In [51]:
tokens = ['topic modelling']
best_kernels(tokens, 8)

0,1,2,3,4,5,6
1,Mercari Interactive EDA + Topic Modelling,ThyKhueLy,Python,37294,77,597
2,Spooky NLP and Topic Modelling tutorial,Anisotropic,Python,36195,136,492
3,NLP in R: Topic Modelling,Rachael Tatman,R,10949,60,49
5,Topic Modelling with LSA and LDA,Ryan Cushen,Python,2304,4,16
6,Topic Modelling (LDA) on Elon Tweets,Renato Aranha,Python,1870,3,12
7,"Kanye Lyrics: EDA, Song Generator, Topic Modelling",Pratap Vardhan,Python,1131,1,10
4,NLP in R: Topic Modelling Workbook,Rachael Tatman,R,1800,0,9
8,Topic Modelling and sentiment analysis,SathviSiva,Python,933,0,9


In [52]:
tokens = ['word embedding','fasttext', 'glove', 'word2vec']
best_kernels(tokens, 8)

0,1,2,3,4,5,6
1,Pooled GRU + FastText,Vladimir Demidov,Python,20972,38,220
2,LSTM with word2vec embeddings,lystdo,Python,50238,170,205
4,Improved LSTM baseline: GloVe + dropout,Jeremy Howard,Python,18219,34,203
5,Spell Checker using Word2vec,CPMP,Python,10630,33,114
6,Simple Keras FastText: val_loss 0.31,nzw,Python,7012,31,85
7,"keras lstm attention glove840b,lb 0.043",qianqian,Python,8304,18,81
8,Using FastText models for robust embeddings,Max Schumacher,Python,7579,26,72
3,[ LB 0.18+ ] LSTM with GloVe and magic features,lystdo,Python,7208,75,70


## 9. Data Science Tools

In [53]:
tokens = ['scikit']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Your First Scikit-Learn Model,DanB,Python,78316,214,594
2,Scikit-Learn ML from Start to Finish,Jeff Delaney,Python,66421,79,272
3,10 Classifier Showdown in Scikit-Learn,Jeff Delaney,Python,28175,57,205
4,ML-Ensemble: Scikit-learn style ensemble learning,flnr,Python,8500,33,55
5,Scikit-Learn ML from Start to Finish,Rajat Shah,Python,3561,8,29
6,Grid search xgboost with scikit-learn,Kazuaki Tanida,Python,31687,8,22
7,Principal Component Analysis with Scikit-Learn,Niraj Verma,Python,5846,9,21
8,Tips for Using Scikit-Learn for Evaluation,cass,Python,1056,0,18
9,Scikit-learn pipelines and pandas,JanKoch,Python,7130,8,16
10,Classifying News Headlines with scikit-learn,Ed King,Python,4009,1,13


In [54]:
tokens = ['tensorflow', 'tensor flow']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,TensorFlow deep NN,Kirill Kliavin,Python,104309,183,524
2,Predicting Fraud with TensorFlow,Currie32,Python,50336,47,168
3,Nuclei DSB 2018 TensorFlow U-Net Score 0.352,Raoul,Python,8664,31,131
5,Data augmentation and Tensorflow U-Net,ShenShen,Python,14536,10,109
6,Programming in TensorFlow and Keras,DanB,Python,31559,30,104
7,Tensorflow starter: conv1d + embeddings (0.442 LB),ololo,Python,3946,28,75
4,Cancer Image TensorFlow CNN 80% Valid. Acc.,Raoul,Python,5574,8,64
8,Multi-GPU tensorflow convnet [0.65],Adam Blazek,Python,11911,66,59
9,Neural Network Model for House Prices (TensorFlow),Julien Heiduk,Python,15641,14,57
10,Basic U-net using Tensorflow,Vijay Jadhav,Python,8789,24,45


In [55]:
tokens = ['theano']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Theano+Lasange Starter,Florian Muellerklein,Python,12571,24,25
2,Fast LeNet5 CNN in Theano for GPU,Lukasz 8000,Python,13154,9,22
3,Training a U-Net model in keras Theano,Ramiro Debbe,Python,1406,2,8
4,Open dataset - theano tensor first image,Paul Larmuseau,Python,233,0,3
5,tensor theano,Alaa Awad,Python,746,0,2
6,TheanoLasagne - Fork Florian Muellerkle,Andre lopes,Python,830,0,2
7,Practice Theano Logistic Regression,Andrew Blaikie,Python,125,0,1
8,Test_for_theano,jack,Python,423,0,1
9,Fast LeNet5 CNN in Theano for GPU,Jundong Qiao,Python,161,0,1
10,Theano conv network,Tehnar,Python,675,0,1


In [56]:
tokens = ['keras']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Introduction to CNN Keras - 0.997 (top 6%),Yassine Ghouzam,Python,49189,211,1059
2,Keras U-Net starter - LB 0.277,Kjetil Åmdal-Sævik,Python,75835,131,795
3,Keras Model for Beginners (0.210 on LB)+EDA+R&D,DeveshMaheshwari,Python,23095,84,362
4,Deep Neural Network Keras way,Poonam Ligade,Python,37771,89,283
5,[For Beginners] Tackling Toxic Using Keras,Bongo,Python,14669,35,269
6,A simple nn solution with Keras (~0.48611 PL),noobhound,Python,19896,79,240
7,Dog Breed - Pretrained keras models(LB 0.3),beluga,Python,28590,47,225
8,Keras - Bidirectional LSTM baseline ( lb 0.069),CVxTz,Python,19394,41,197
9,End-to-end baseline with U-net (keras),n01z3,Python,34211,130,163
10,CatdogNet - Keras Convnet Starter,Jeff Delaney,Python,36608,67,162


In [57]:
tokens = ['pytorch']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Starting Kit for PyTorch Deep Learning,Mamy Ratsimbazafy,Python,13325,17,61
2,Pytorch Tutorial for Deep Learning Lovers,Kaan Can,Python,3076,14,57
4,Statoil CSV PyTorch SENet ensemble LB 0.1520,QuantScientist,Python,7503,43,48
3,Recurrent Neural Network with Pytorch,Kaan Can,Python,822,10,40
5,PyTorch CNN DenseNet Ensemble LB 0.1538,QuantScientist,Python,10281,26,37
7,Pytorch starter,Austin,Python,3160,9,37
8,Pre-trained PyTorch Monkeys: A Deep Dream,paultimothymooney,Python,1663,16,36
6,PyTorch GPU CNN & BCELoss with predictions,QuantScientist,Python,7603,16,32
9,Simple PyTorch with kaggle's GPU,Leigh,Python,870,1,31
10,PyTorch Tutorials on DSB2018,Yun Chen,Python,3601,12,29


In [58]:
tokens = ['vowpal wabbit','vowpalwabbit']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Vowpal Wabbit tutorial: blazingly fast learning,Yury Kashnitsky,Python,10432,22,364
3,"Fast, low memory learning - part 1: VowpalWabbit",Aimoldin Anuar [dsmlkz],Python,549,3,37
2,Topic 8. Online learning and Vowpal Wabbit,Yury Kashnitsky,Python,76,0,12
4,Vowpal Wabbit - input file preparation,Konrad Banachewicz,Python,311,1,11
5,Part 1: Titanic encounters Vowpal Wabbit and R,Ivan Bajdarvanov,R,324,0,3
7,LRM Fast - Vowpal Wabbit Implementation,Pulkit Jha,Python,99,3,2
8,Vowpal Wabbit decides who lives and who dies,Misha Lisovyi,Python,67,0,1
6,Part 2: Titanic encounters Vowpal Wabbit and R,Ivan Bajdarvanov,R,128,0,0
9,try to understand vowpalwabbit,Evgenii Zhukov,Python,29,0,0


In [59]:
tokens = ['eli5']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,ELI5 for Mercari,Konstantin Lopuhin,Python,11126,70,216
3,"Understanding Approval-DonorsChoose-EDA,FE,ELI5",Jagan,Python,1442,5,34
4,ELI5 for TOXIC,Sergei Fironov,Python,715,4,19
5,ELI5 What's Different About the Test Set? (EDA),Peter Hurford,Python,298,4,14
2,eli5 example,Konstantin Lopuhin,Python,196,0,13


In [60]:
tokens = ['hyperopt']
best_kernels(tokens, 5)

0,1,2,3,4,5,6
1,"Tune and compare XGB, LightGBM, RF with Hyperopt",Eike Dehling,Python,5964,9,43
2,Home_Credit_Hyperopt_optimization,olivier,Python,647,5,16
3,Hyperparameter tuning using Hyperopt,ronroc,Python,3116,5,11
4,FS(Lasso)+HyperParamTuning(HyperOpt),Abhilash Awasthi,Python,446,0,9
5,Random Forest from grid search to hyperopt,RpyGamer,Python,2551,0,8


In [61]:
tokens = ['pandas']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Selecting and Filtering in Pandas,DanB,Python,91900,2,593
2,Univariate plotting with pandas,Aleksey Bilogur,Python,27532,42,139
4,Tutorial: Accessing Data with Pandas,Sohier Dane,Python,13013,36,136
5,Learn Pandas with Pokemons,"I,Coder",Python,8283,28,116
6,Predict hotel type with pandas,dune_dweller,Python,31559,22,91
7,Global Religion 1945-2010: Plotly & Pandas visuals,Anisotropic,Python,6522,29,90
8,A Home for Pandas and Sklearn: Beginner How-Tos,TimLee,Python,4453,16,88
3,Bivariate plotting with pandas,Aleksey Bilogur,Python,14479,18,74
9,Humble Intro to Analysis with Pandas and Seaborn,Chris Crawford,Python,9222,12,63
10,"Plotting with pandas, matplotlib, and seaborn",Data Framed,Python,1342,19,41


In [62]:
tokens = ['SQL']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,SQL Scavenger Hunt Handbook,Rachael Tatman,Python,33905,209,375
2,SQL Scavenger Hunt: Day 1,Rachael Tatman,Python,21829,300,283
7,SQL and Python primer - Bokeh | Plotly,Anisotropic,Python,16687,32,168
3,SQL Scavenger Hunt: Day 2,Rachael Tatman,Python,9386,193,138
4,SQL Scavenger Hunt: Day 4,Rachael Tatman,Python,7104,132,91
5,SQL Scavenger Hunt: Day 3,Rachael Tatman,Python,6627,169,86
6,SQL Scavenger Hunt: Day 5,Rachael Tatman,Python,5148,128,85
8,Getting Started with SQL and BigQuery,DanB,Python,29439,39,79
9,Data Analysis using SQL,Dima Rudov,Python,6267,9,51
10,SQL Scavenger Hunt: Day 1,Brian W.,Python,1446,21,40


In [63]:
tokens = ['bigquery', 'big query']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Getting Started with SQL and BigQuery,DanB,Python,29439,39,79
2,Analyzing 3 Million Github Repos using BigQuery,Poonam Ligade,Python,2541,7,49
3,KB-->MB-->GB-->TB-->?B (BigQuery),"I,Coder",Python,1355,15,31
4,Getting started with Big Query,Sohier Dane,Python,4094,5,29
7,Air quality EDA using SQL-Bigquery,Sudhir Kumar,Python,782,8,24
5,Beyond Queries: Exploring the BigQuery API,Sohier Dane,Python,2023,2,22
8,My 15th solution features (mainly using BigQuery),tkm2261,Python,1386,5,21
6,How to integrate BigQuery & Pandas,Sohier Dane,Python,3602,11,20
10,No RAM? Fast feature engineering with Big Query,Lior Perez,Python,399,8,15
9,BigQuery & Kaggle Tutorial with LB: 0.59 and 0.546,tkm2261,Python,793,0,11


## 10. Data Visualization

In [64]:
tokens = ['visualization', 'visualisation']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Python Data Visualizations,Ben Hamner,Python,167601,116,761
2,Feature Selection and Data Visualization,Kaan Can,Python,36341,186,448
3,Strength of visualization-python visuals tutorial,BuryBuryZymon,Python,32206,92,397
4,In-Depth Analysis & Visualisations - Avito,sban,Python,14069,71,305
5,Geolocation visualisations,BeyondBeneath,Python,20801,40,225
6,Detailed Cleaning/Visualization (Python),"Alan ""AJ"" Pryor, Ph.D.",Python,34340,70,206
8,Map visualizations with external shapefile,Jordan Tremoureux,R,4987,36,164
7,Detailed Cleaning/Visualization,"Alan ""AJ"" Pryor, Ph.D.",R,24063,38,148
9,Welcome to data visualization,Aleksey Bilogur,Python,46449,16,146


In [65]:
tokens = ['plotly', 'plot.ly']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Interactive Porto Insights - A Plot.ly Tutorial,Anisotropic,Python,29103,128,414
2,Decision Boundaries visualised via Python & Plotly,Anisotropic,Python,16253,42,195
6,Plotly Tutorial for Beginners,Kaan Can,Python,7875,66,189
3,Generation Unemployed? Interactive Plotly Visuals,Anisotropic,Python,17893,73,188
4,SQL and Python primer - Bokeh | Plotly,Anisotropic,Python,16687,32,168
5,Global Religion 1945-2010: Plotly & Pandas visuals,Anisotropic,Python,6522,29,90
7,Intermediate visualization tutorial using Plotly,Siddharth Yadav,Python,2156,45,78
8,Mass Shooting in US (using plotly),Anton Aksyonov,R,4646,9,56
9,A Very Extensive EDA of Physics Particles : Plotly,Lathwal,Python,4587,3,53
10,EDA with Plotly,AdhokshajaPradeep,R,14875,38,47


In [66]:
tokens = ['seaborn']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,Seaborn Tutorial for Beginners,Kaan Can,Python,18291,118,215
2,Visualizing Pokémon Stats with Seaborn,Andrew Gelé,Python,27165,51,158
3,Plotting with seaborn,Aleksey Bilogur,Python,16273,19,78
5,Humble Intro to Analysis with Pandas and Seaborn,Chris Crawford,Python,9222,12,63
4,Faceting with seaborn,Aleksey Bilogur,Python,9320,17,56
6,"Plotting with pandas, matplotlib, and seaborn",Data Framed,Python,1342,19,41
7,Seaborn Visualization,NeilS,Python,2262,8,18
8,Python Seaborn PairPlot Example,Ben Hamner,Python,28476,6,16


In [67]:
tokens = ['d3.js']
best_kernels(tokens, 4)

0,1,2,3,4,5,6
1,Comprehensive Python and D3.js Favorita analytics,Anisotropic,Python,14581,29,175
4,Interactive D3.js Visualisations in Kaggle Kernels,sban,Python,2522,62,103
2,Ghastly Network and D3.js Force-directed graphs,Anisotropic,Python,3073,15,55
3,Zoomable Circle Packing via D3.js in IPython,Anisotropic,Python,2984,15,51


In [68]:
tokens = ['bokeh']
best_kernels(tokens, 10)

0,1,2,3,4,5,6
1,SQL and Python primer - Bokeh | Plotly,Anisotropic,Python,16687,32,168
2,Visualization: Bokeh Tutorial Part 1,Kaan Can,Python,7297,7,63
3,Interactive Bokeh Tutorial Part 2,Kaan Can,Python,2753,4,38
4,Karnataka Education EDA using Bokeh Visualisation,Pavan Sanagapati,Python,627,7,29
5,EDA with python library bokeh,naveenkb,Python,395,1,12
6,t-SNE + Bokeh,Yohan,Python,1959,1,10
7,Exploratory Data Analysis with Bokeh,dtromero,Python,1675,0,8
8,Visualization of trips using bokeh and Datashader,saihttam,Python,605,2,8
9,Interactive Visualization with Bokeh!,Phil Butcher,Python,4177,1,6
10,Exploring and Visualizing using bokeh,itzzthad,Python,1678,3,5


<br>
Thanks for viewing. Suggest the list of items which can be added to the list. If you liked this kernel, please upvote.  
