In [48]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import re
import warnings
from wordcloud import WordCloud
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation as LDA

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

#   ****1. LOADING THE PAPERS****

In [49]:
papers = pd.read_csv("/kaggle/input/nips-papers/papers.csv")
papers.head()

# ****2. PREPARING THE DATA FOR ANALYSIS****

In [50]:
#droping unwanted columns
papers.drop(['id', 'event_type', 'pdf_name'], axis=1, inplace=True)
papers.head(10)

# ****3. PLOTTING HOW MACHINE LEARNING HAS EVOVLED OVER TIME****

In [51]:
# group the papers by years
groups = papers.groupby('year')

# determine the size of each group
counts = groups.size()

#visualize 
plt.figure(figsize=(16,8))
plt.title('Evolution of Machine Learning')
plt.ylabel('Count')
counts.plot(kind='bar', color='c', animated= True, edgecolor = 'black')

# **4. PREPROCESSING THE DATA**

In [52]:
# print the titles of papers

print(papers['title'].head())
papers['new_title'] = papers['title'].map(lambda x: re.sub('[,\.!?]', '', x))
papers['new_title'] = papers['new_title'].map(lambda x: x.lower())
papers['new_title'].head()

# **5. WORD CLOUD**

In [53]:
# WordCloud

allwords = ' '.join(papers['new_title']) 
wordcloud = WordCloud(width= 500, height= 250, random_state= 21, max_font_size= 119).generate(allwords)
wordcloud.to_image()

# **6. LDA ANALYSIS ON TEXT**

In [54]:
import random
# helper function
def common_words(count_data, count_vectorizer):
    words = count_vectorizer.get_feature_names()
    total_count = np.zeros(len(words)) 
    for i in count_data:
        total_count += i.toarray()[0]
        
    count_dict = (zip(words, total_count))
    count_dict = sorted(count_dict, key = lambda x: x[1],reverse=True)[0:10]
    words = [j[0] for j in count_dict]
    count = [j[1] for j in count_dict]
    x_pos = np.arange(len(words))
    
    plt.figure(figsize=(12,8), dpi= 80)
    pl= plt.bar(x_pos, count, align='center', color= 'c', edgecolor='black', animated=True)
    
    # Annotating the heights of the bars at the top of the bars
    for bar in pl:
        plt.annotate(bar.get_height(),xy=(bar.get_x()+0.07, bar.get_height()+10), fontsize=12)
    plt.xticks(x_pos, words, rotation = 60)
    plt.title('10 most common words')
    plt.xlabel('Words')
    plt.ylabel('Count')
    plt.show()

#driver function
count_vectorizer = CountVectorizer(stop_words = 'english')
count_data = count_vectorizer.fit_transform(papers['new_title'])
common_words(count_data, count_vectorizer)

# **7. ANALYSING TRENDS WITH LDA**

In [55]:
warnings.simplefilter('ignore', DeprecationWarning)

#helper function
def topics(model, count_vectorizer, n_topwords):
    words = count_vectorizer.get_feature_names()
    for topic_index, topic in enumerate(model.components_):
        print('\nTopic #%d:' % topic_index)
        print(" ".join([words[w] for w in topic.argsort()[:-n_topwords-1:-1]]))
        
#driver code
#tweak below parameters
number_topics = 10
number_words = 10

#Create and fit the LDA model
lda = LDA(n_components = number_topics)
lda.fit(count_data)

print('Topics found via LDA:')
topics(lda, count_vectorizer, number_words)

# **8. CONCLUSION**
Machine learning has become increasingly popular over the past years. The number of NIPS conference papers has risen exponentially, and people are continuously looking for ways on how they can incorporate machine learning into their products and services.
Although this analysis focused on analyzing machine learning trends in research, a lot of these techniques are rapidly being adopted in industry. Following the latest machine learning trends is a critical skill for a data scientist, and it is recommended to continuously keep learning by going through blogs, tutorials, and courses.