# Data Science Trend Analysis

# Importing the Libraries

In [1]:
import os
import pandas as pd
import numpy as np

## Bokeh Setup

In [None]:
from bokeh.io import output_notebook
from bokeh.io import show
from bokeh.plotting import figure
from bokeh.transform import cumsum
from bokeh.palettes import Spectral6
from bokeh.models import ColumnDataSource
from bokeh.layouts import gridplot
from bokeh.io import curdoc
from bokeh.models import HBar, Plot
# curdoc().theme = 'dark_minimal'
curdoc().theme = 'caliber'

output_notebook()

## Time

In [None]:
web_time = pd.read_csv('/input/data-science-global-trends-2010-2021/WebSearch_InterestOverTime.csv')
web_time.head()

Unnamed: 0,Category: All categories
Month,Data science + Machine Learning + Computer Vis...
2010-01,12
2010-02,11
2010-03,11
2010-04,11


**We will have to change the column name to the one present in the first row**

In [None]:
web_time.columns = web_time.iloc[0]
web_time = web_time.reset_index()
web_time.rename(columns = {'index': 'Month', 'Data science + Machine Learning + Computer Vision + Natural Language Processing + Deep Learning: (Worldwide)': 'Total'}, inplace=True)
web_time.drop(0, axis = 0, inplace = True)
web_time.head()

Month,Month.1,Total
1,2010-01,12
2,2010-02,11
3,2010-03,11
4,2010-04,11
5,2010-05,10


In [None]:
web_time['Total'] = pd.to_numeric(web_time['Total'])
web_time['Year'] = pd.DatetimeIndex(web_time['Month']).year
web_time.head() 

Month,Month.1,Total,Year
1,2010-01,12,2010
2,2010-02,11,2010
3,2010-03,11,2010
4,2010-04,11,2010
5,2010-05,10,2010


In [None]:
def trend(df):
    """
    A function that will plot the trend for the given years
    """
    
    cols = [int(i) for i in df['Year']]
    vals = df['Total'].to_list()
    source = ColumnDataSource(data = dict(cols = cols, counts = vals))
    p = figure(plot_height = 400, plot_width = 800, 
               x_axis_label = 'Years', y_axis_label = 'Count')
    
    # add a line renderer with legend and line thickness
    p.line('cols', 'counts', line_width = 2, source = source, line_dash = "dashed")
    p.title.align = 'center'
    p.title.text_font_size = '20pt'
    p.title.text_font_style = 'bold'
    p.title.text_font = 'Serif'
    p.xaxis.axis_label_text_font_size = "16pt"
    p.yaxis.axis_label_text_font_size = "16pt"

    # show the results
    show(p)

In [None]:
trend(web_time)

## Region

In [None]:
web_region = pd.read_csv('/kaggle/input/data-science-global-trends-2010-2021/WebSearch_InterestByRegion.csv')
web_region.head()

Unnamed: 0,Category: All categories
Country,Data science + Machine Learning + Computer Vis...
Singapore,100
St. Helena,98
India,84
Ethiopia,83


In [None]:
web_region.columns = web_region.iloc[0]
web_region = web_region.reset_index()
web_region.rename(columns = {'index': 'Country', 'Data science + Machine Learning + Computer Vision + Natural Language Processing + Deep Learning: (1/1/10 - 4/18/21)': 'Total'}, inplace=True)
web_region.drop(0, axis = 0, inplace = True)
web_region.head()

Country,Country.1,Total
1,Singapore,100
2,St. Helena,98
3,India,84
4,Ethiopia,83
5,Rwanda,80


In [None]:
web_region["Total"] = pd.to_numeric(web_region["Total"]) # Chaning the data type from string to integer.

**We will be using Folium for dealing with maps. Folium makes it easy to visualize data thatâ€™s been manipulated in Python on an interactive leaflet map. It enables both the binding of data to a map for choropleth visualizations as well as passing rich vector/raster/HTML visualizations as markers on the map.**

In [None]:
import folium
import json
world_geojson = json.load(open('/kaggle/input/world1/world-countries.json'))

## The json file is neccessary for the choropleth, as it contains coordinate information about each country.

In [None]:
def mapping(df, world_geojson, indices_to_drop = False):
    """
    A function that enables you to view the cases distribution on a world map
    """
    
    df1 = df[['Country', 'Total']]
    df1['Total'] = df1['Total'].fillna(0) # Replacing null values with 0
    if indices_to_drop:
        df1 = df1.drop(indices_to_drop, axis = 0) # Removing them because they had values <1 which causes problems while mapping.
    df1["Total"] = pd.to_numeric(df1["Total"])
    
    m = folium.Map(tiles = "cartodbpositron")
    
    folium.Choropleth(
        geo_data = world_geojson,
        data = df1,
        columns = ['Country', 'Total'],
        key_on = 'feature.properties.name',
        fill_color="YlOrRd",
        fill_opacity = 0.7, 
        line_opacity = 0.2,
    ).add_to(m)

    m
    
    return m

In [None]:
mapping(web_region, world_geojson)

**China and India are the countries where the web searches are made the most!**

## Topics

In [None]:
web_topics = pd.read_csv('/kaggle/input/data-science-global-trends-2010-2021/WebSearch_RelatedTopics.csv', skiprows = 4, names = ['Topic', 'Number of Searches'])

In [None]:
web_topics.head()

Unnamed: 0,Topic,Number of Searches
0,Learning,100
1,Machine learning,72
2,Data,58
3,Science,51
4,Data science,44


There is a unique kind of value for 'Number of Searches' column, breakout means extremely popular, therefore let's see the topics that broke out.

In [None]:
web_topics[web_topics['Number of Searches'] == 'Breakout']['Topic'].unique()

array(['Coursera', 'Big data', 'TensorFlow', 'GitHub', 'Microsoft Azure',
       'scikit-learn', 'Amazon Web Services', 'Cloud computing', 'Keras',
       'Udemy, Inc.', 'Udacity', 'Udemy', 'Quora', 'Kaggle',
       'Internet of things', 'Deep reinforcement learning',
       'Apache Spark', 'Professional certification',
       'Master in Data Science', 'pandas', 'PyTorch',
       'Academic certificate', 'Business analytics'], dtype=object)

After looking at the above terms, a smile came to my face. It's because these are the words that we as data scientists must have searched at some point of our career, I mean I still do. For example:

* If I want to read about Big Data I would search 'Big Data' a lot of times on Google.

* If I want to learn AWS or read its docs, I would search Amazon Web Services.

* If I want to learn about something new I would go to Coursera or Udemy. 

Therefore, these above terms are pretty popular and are regularly searched.

Let's look at the words that have some count associated with it.

In [None]:
df = web_topics[web_topics['Number of Searches'] != 'Breakout']
df['Number of Searches'] = pd.to_numeric(df['Number of Searches'])
df_sort = df.sort_values(by = 'Number of Searches', ascending = False).head(10)
df_sort.head()

Unnamed: 0,Topic,Number of Searches
0,Learning,100.0
1,Machine learning,72.0
2,Data,58.0
3,Science,51.0
4,Data science,44.0


In [None]:
topics = df_sort['Topic'].to_list()[:10]
counts = df_sort['Number of Searches'].to_list()[:10]
source = ColumnDataSource(data = dict(topics = topics, counts = counts, color = ['teal'] * 10))

p = figure(x_range = topics, plot_height = 400, plot_width = 800, title = "Most Popular Topics", tools = "hover", tooltips = "@topics: @counts", background_fill_color="#f4f0ec")
p.vbar(x = 'topics', top = 'counts', width = 0.9, source = source, color = 'color')

p.xgrid.grid_line_color = None
p.legend.orientation = "horizontal"
p.legend.location = "top_right"

p.title.align = 'center'
p.title.text_font_size = '20pt'
p.title.text_font_style = 'bold'
p.title.text_font = 'Serif'
p.xaxis.axis_label_text_font_size = "16pt"
p.yaxis.axis_label_text_font_size = "16pt"
    
show(p)

**From the above graph, we can observe that words like 'Machine Learning', 'Data', 'Data Science' were searched most often. These are certain words that a lot of people tend to search the most number of times. Imagine you as a beginner and starting to learn AI, first I will go and read about 'Machine Learning', while reading Machine Learning, I will come across 'Deep Learning' and then I will search for that. Then I may across 'Python' as a programming language for doing ML/AI, then I will search and read about Python. So all these searches make a lot of sense.**

## Queries

In [None]:
web_queries = pd.read_csv('/kaggle/input/data-science-global-trends-2010-2021/WebSearch_RelatedQueries.csv', skiprows = 4, names = ['Query', 'Number of Searches'])
web_queries.head()

Unnamed: 0,Query,Number of Searches
0,python,100
1,python machine learning,53
2,ai,42
3,what is machine learning,32
4,data science python,31


**Again there are some breakout queries, let's have a look at them!**

In [None]:
web_queries[web_queries['Number of Searches'] == 'Breakout']['Query'].unique()

array(['coursera', 'github', 'tensorflow', 'machine learning coursera',
       'azure', 'python deep learning', 'azure machine learning',
       'data science salary', 'python for data science',
       'deep learning ai', 'machine learning engineer',
       'deep reinforcement learning', 'coursera data science', 'keras',
       'machine learning with python', 'data science and analytics',
       'udacity', 'udemy', 'python in data science', 'kaggle',
       'aws machine learning', 'data science certificate',
       'machine learning and deep learning', 'r for data science',
       'machine learning salary'], dtype=object)

**These are again very similar queries, I can see many queries that I have also searched a lot of times :), like Python for ML, kaggle, r for ML, Azure ML, TensorFlow, etc. I am sure you also have searched queries a lot of times.**

In [None]:
def get_top_n_words(corpus, n = None):
    """
    A function that returns the top 'n' unigrams used in the corpus
    """
    vec = CountVectorizer().fit(corpus)
    bag_of_words = vec.transform(corpus) ## Shape: (2045, 46774) -> There are 2045 sentences and 46774 words
    sum_words = bag_of_words.sum(axis=0) ## Shape: (1, 46774) -> Count of occurance of each word
    words_freq = [(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()] ## vec.vocabulary_.items returns the dictionary with (word, index)
    freq_sorted = sorted(words_freq, key = lambda x: x[1], reverse = True)
    return freq_sorted[:n]

def get_top_n_bigram(corpus, n = None):
    """
    A function that returns the top 'n' bigrams used in the corpus
    """
    vec = CountVectorizer(ngram_range = (2, 2)).fit(corpus)
    bag_of_words = vec.transform(corpus)
    sum_words = bag_of_words.sum(axis = 0) 
    words_freq = [(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()]
    freq_sorted = sorted(words_freq, key = lambda x: x[1], reverse=True)
    return freq_sorted[:n]

In [None]:
def unigram_plot(unigram, color):
    """
    A function used to plot bar charts for top unigrams
    """
    words = [i[0] for i in unigram]
    count = [i[1] for i in unigram]
    source = ColumnDataSource(data = dict(Word = words, counts = count, color = [color] * 10))

    p = figure(x_range = words, plot_height = 400, plot_width = 800, title = "Top Unigram", tools = "hover", tooltips = "@Word: @counts", background_fill_color="#f4f0ec")
    p.vbar(x = 'Word', top = 'counts', width = 0.8, source = source, color = 'color')
    p.title.align = 'center'
    p.xaxis.major_label_orientation = "horizontal"

    p.xgrid.grid_line_color = None
    p.legend.orientation = "horizontal"
    p.legend.location = "top_right"

    p.title.align = 'center'
    p.title.text_font_size = '20pt'
    p.title.text_font_style = 'bold'
    p.title.text_font = 'Serif'
    p.xaxis.axis_label_text_font_size = "16pt"
    p.yaxis.axis_label_text_font_size = "16pt"
    
    return p

In [None]:
def bigram_plot(bigram, color, color_length):
    """
    A function used to plot bar charts for top bigrams
    """
    words = [i[0] for i in bigram]
    count = [i[1] for i in bigram]
    source = ColumnDataSource(data = dict(Word = words, counts = count, color = [color] * color_length))

    p = figure(x_range = words, plot_height = 400, plot_width = 800, title = "Top Bigrams", tools = "hover", tooltips = "@Word: @counts", background_fill_color="#f4f0ec")
    p.vbar(x = 'Word', top = 'counts', width = 0.8, source = source, color = 'color')
    p.xgrid.grid_line_color = None
    p.xaxis.major_label_orientation = "vertical"
    p.legend.orientation = "horizontal"
    p.legend.location = "top_right"

    p.title.align = 'center'
    p.title.text_font_size = '20pt'
    p.title.text_font_style = 'bold'
    p.title.text_font = 'Serif'
    p.xaxis.axis_label_text_font_size = "16pt"
    p.yaxis.axis_label_text_font_size = "16pt"

    return p

In [None]:
top_unigram = get_top_n_words(web_queries['Query'], 10)
show(unigram_plot(top_unigram, '#6baed6'))

In [None]:
top_bigram = get_top_n_bigram(web_queries['Query'], 10)
show(bigram_plot(top_bigram, '#a1dab4', 10))

## Time

In [None]:
youtube_time = pd.read_csv('/kaggle/input/data-science-global-trends-2010-2021/YoutubeSearch_InterestOverTime.csv')
youtube_time.head()

Unnamed: 0,Category: All categories
Month,Data science + Machine Learning + Computer Vis...
2010-01,5
2010-02,3
2010-03,3
2010-04,6


In [None]:
youtube_time.columns = youtube_time.iloc[0]
youtube_time = youtube_time.reset_index()
youtube_time.rename(columns = {'index': 'Month', 'Data science + Machine Learning + Computer Vision + Natural Language Processing + Deep Learning: (Worldwide)': 'Total'}, inplace=True)
youtube_time.drop(0, axis = 0, inplace = True)
youtube_time.head()

Month,Month.1,Total
1,2010-01,5
2,2010-02,3
3,2010-03,3
4,2010-04,6
5,2010-05,5


In [None]:
youtube_time['Total'] = pd.to_numeric(youtube_time['Total'])
youtube_time['Year'] = pd.DatetimeIndex(youtube_time['Month']).year
youtube_time.head() 

Month,Month.1,Total,Year
1,2010-01,5,2010
2,2010-02,3,2010
3,2010-03,3,2010
4,2010-04,6,2010
5,2010-05,5,2010


In [None]:
trend(youtube_time)

**From the above graph, we can observe that there has been a constant increase in the number of searches on Youtube from 2010 to 2016 then it dropped for one month, then again it started rising till 2020.**

## Topics

In [None]:
youtube_topics = pd.read_csv('/kaggle/input/data-science-global-trends-2010-2021/YoutubeSearchRelatedTopics.csv', skiprows = 4, names = ['Topic', 'Number of Searches'])
youtube_topics.head()

Unnamed: 0,Topic,Number of Searches
0,Learning,100
1,Machine learning,77
2,Machine,51
3,Data,39
4,Science,34


In [None]:
youtube_topics[youtube_topics['Number of Searches'] == 'Breakout']['Topic'].unique()

array(['Science', 'Data science', 'Python', 'Tutorial', 'Course',
       'Project', 'Algorithm', 'Andrew Ng', 'Artificial neural network',
       'Statistics', 'Computer Science',
       'National Programme on Technology Enhanced Learning', 'Linearity',
       'Neuron', 'Massachusetts Institute of Technology', 'TensorFlow',
       'Analysis', 'R', 'Computer programming', 'Prediction', 'Coursera',
       'Data analysis', 'Linear regression', 'Statistical classification',
       'Engineering'], dtype=object)

**Woah, I see Andrew Ng's name there :). He is truly the best in this field. Next, we can also see the Massachusetts Institute of Technology, it might be due to its great ML course that is available on Youtube. We can also see words like Neuron, ANN, etc. Oh God, these words are taking me back to my initial days!**

In [None]:
df = youtube_topics[youtube_topics['Number of Searches'] != 'Breakout']
df['Number of Searches'] = pd.to_numeric(df['Number of Searches'])
df_sort = df.sort_values(by = 'Number of Searches', ascending = False).head(10)
df_sort.head()

Unnamed: 0,Topic,Number of Searches
0,Learning,100.0
1,Machine learning,77.0
2,Machine,51.0
3,Data,39.0
4,Science,34.0


In [None]:
topics = df_sort['Topic'].to_list()[:10]
counts = df_sort['Number of Searches'].to_list()[:10]
source = ColumnDataSource(data = dict(topics = topics, counts = counts, color = ['lightseagreen'] * 10))

p = figure(x_range = topics, plot_height = 400, plot_width = 800, title = "Most Popular Topics", tools = "hover", tooltips = "@topics: @counts", background_fill_color="#f4f0ec")
p.vbar(x = 'topics', top = 'counts', width = 0.9, source = source, color = 'color')

p.xgrid.grid_line_color = None
p.legend.orientation = "horizontal"
p.legend.location = "top_right"
p.xaxis.major_label_orientation = "vertical"
p.title.align = 'center'
p.title.text_font_size = '20pt'
p.title.text_font_style = 'bold'
p.title.text_font = 'Serif'
p.xaxis.axis_label_text_font_size = "16pt"
p.yaxis.axis_label_text_font_size = "16pt"
    
show(p)

## Queries

In [None]:
youtube_queries = pd.read_csv('/kaggle/input/data-science-global-trends-2010-2021/YoutubeSearchRelatedQueries.csv', skiprows = 4, names = ['Query', 'Number of Searches'])
youtube_queries.head()

Unnamed: 0,Query,Number of Searches
0,python,100
1,python machine learning,60
2,machine learning tutorial,42
3,python data science,32
4,ai,25


In [None]:
youtube_queries[youtube_queries['Number of Searches'] == 'Breakout']['Query'].to_list()

['python',
 'machine learning tutorial',
 'python data science',
 'ai',
 'andrew ng',
 'ai machine learning',
 'data science for beginners',
 'python tutorial',
 'nptel',
 'machine learning andrew ng',
 'what is machine learning',
 'statistics',
 'algorithm',
 'python for data science',
 'computer science',
 'data science tutorial',
 'data science course',
 'machine learning in hindi',
 'machine learning in python',
 'machine learning for beginners',
 'mit',
 'machine learning course',
 'deep learning tutorial',
 'deep learning python',
 'machine learning projects']

* Most of the people have queried about Machine Learning tutorials, data science tutorials, and machine learning in python.

* People also queried about machine learning projects which I have also searched quite often.

* It's nice that terms like Statistics are being queried to get to know the core concepts.

**It would be fun to analyze the top channels that people follow for learning such topics.**

In [None]:
top_unigram = get_top_n_words(youtube_queries['Query'], 10)
show(unigram_plot(top_unigram, 'salmon'))

In [None]:
top_bigram = get_top_n_bigram(youtube_queries['Query'], 10)
show(bigram_plot(top_bigram, 'mediumaquamarine', 10))

**These results were quite expected, here we can see that words like 'Machine Learning', 'Data Science', and 'Andrew Ng' are searched the most on Youtube. I would recommend the Andrew Ng course for anyone who is looking to get started with AIML.**

## Region

In [None]:
youtube_region = pd.read_csv('/kaggle/input/data-science-global-trends-2010-2021/YoutubeSearch_InterestByRegion.csv')
youtube_region.head()

Unnamed: 0,Category: All categories
Country,Data science + Machine Learning + Computer Vis...
China,100
Nigeria,25
Ethiopia,23
Uganda,21


In [None]:
youtube_region.columns = youtube_region.iloc[0]
youtube_region = youtube_region.reset_index()
youtube_region.rename(columns = {'index': 'Country', 'Data science + Machine Learning + Computer Vision + Natural Language Processing + Deep Learning: (1/1/10 - 4/18/21)': 'Total'}, inplace=True)
youtube_region.drop(0, axis = 0, inplace = True)
youtube_region.head()

Country,Country.1,Total
1,China,100
2,Nigeria,25
3,Ethiopia,23
4,Uganda,21
5,Rwanda,18


In [None]:
mapping(youtube_region, world_geojson, [142, 143, 144])

## Time

In [None]:
news_time = pd.read_csv('/kaggle/input/data-science-global-trends-2010-2021/NewsSearchInterestOverTime.csv')
news_time.head()

Unnamed: 0,Category: All categories
Month,Data science + Machine Learning + Computer Vis...
2010-01,5
2010-02,0
2010-03,10
2010-04,8


In [None]:
news_time.columns = news_time.iloc[0]
news_time = news_time.reset_index()
news_time.rename(columns = {'index': 'Month', 'Data science + Machine Learning + Computer Vision + Natural Language Processing + Deep Learning: (Worldwide)': 'Total'}, inplace=True)
news_time.drop(0, axis = 0, inplace = True)
news_time.head()

Month,Month.1,Total
1,2010-01,5
2,2010-02,0
3,2010-03,10
4,2010-04,8
5,2010-05,0


In [None]:
news_time['Total'] = pd.to_numeric(news_time['Total'])
news_time['Year'] = pd.DatetimeIndex(news_time['Month']).year
news_time.head() 

Month,Month.1,Total,Year
1,2010-01,5,2010
2,2010-02,0,2010
3,2010-03,10,2010
4,2010-04,8,2010
5,2010-05,0,2010


In [None]:
trend(news_time)

**This trend is a little bit different, eventhough there was a constant rise till 2015 but after that there has been a constant dip and rise.**

## Topics

In [None]:
news_topics = pd.read_csv('/kaggle/input/data-science-global-trends-2010-2021/NewsSearchRelatedTopics.csv', skiprows = 4, names = ['Topic', 'Number of Searches'])
news_topics.head()

Unnamed: 0,Topic,Number of Searches
0,Learning,100
1,Machine learning,82
2,Machine,61
3,Data,37
4,Science,33


In [None]:
news_topics[news_topics['Number of Searches'] == 'Breakout']['Topic'].unique()

array(['Learning', 'Machine learning', 'Machine', 'Data', 'Science',
       'Data science', 'Deep learning', 'Artificial intelligence',
       'Intelligence', 'Big data', 'Technology', 'Internet of things',
       'Analytics', 'Language', 'Natural language processing',
       'Visual perception', 'Natural language', 'Cloud computing',
       'Computer', 'Artificial neural network', 'Computing', 'Software',
       'Computer vision', 'Robot', 'Python'], dtype=object)

**It's pretty amazing how we can easily differentiate between Youtube Search, News Search, and Web Search by just looking at the topics. For news search, we have sort of more application-based stuff. Like IoT, Cloud Computing, or basically the things in ML and AI can be used.**

In [None]:
df = news_topics[news_topics['Number of Searches'] != 'Breakout']
df['Number of Searches'] = pd.to_numeric(df['Number of Searches'])
df_sort = df.sort_values(by = 'Number of Searches', ascending = False).head(10)
df_sort.head()

Unnamed: 0,Topic,Number of Searches
0,Learning,100.0
1,Machine learning,82.0
2,Machine,61.0
3,Data,37.0
4,Science,33.0


In [None]:
topics = df_sort['Topic'].to_list()[:10]
counts = df_sort['Number of Searches'].to_list()[:10]
source = ColumnDataSource(data = dict(topics = topics, counts = counts, color = ['lightseagreen'] * 10))

p = figure(x_range = topics, plot_height = 400, plot_width = 800, title = "Most Popular Topics", tools = "hover", tooltips = "@topics: @counts", background_fill_color="#f4f0ec")
p.vbar(x = 'topics', top = 'counts', width = 0.9, source = source, color = 'color')

p.xgrid.grid_line_color = None
p.legend.orientation = "horizontal"
p.legend.location = "top_right"
p.xaxis.major_label_orientation = "vertical"
p.title.align = 'center'
p.title.text_font_size = '20pt'
p.title.text_font_style = 'bold'
p.title.text_font = 'Serif'
p.xaxis.axis_label_text_font_size = "16pt"
p.yaxis.axis_label_text_font_size = "16pt"
    
show(p)

## Queries

In [None]:
web_queries = pd.read_csv('/kaggle/input/data-science-global-trends-2010-2021/NewsSearchRelatedQueries.csv', skiprows = 4, names = ['Query', 'Number of Searches'])
web_queries.head()

Unnamed: 0,Query,Number of Searches
0,artificial intelligence,100
1,ai,93
2,big data,62
3,data analytics,31
4,internet of things,17


In [None]:
web_queries[web_queries['Number of Searches'] == 'Breakout']['Query'].to_list()

['artificial intelligence',
 'ai',
 'big data',
 'data analytics',
 'internet of things',
 'machine learning news']

In [None]:
top_unigram = get_top_n_words(web_queries['Query'], 10)
show(unigram_plot(top_unigram, 'salmon'))

In [None]:
top_bigram = get_top_n_bigram(web_queries['Query'], 10)
show(bigram_plot(top_bigram, 'mediumaquamarine', 7))

## Region

In [None]:
news_region = pd.read_csv('/kaggle/input/data-science-global-trends-2010-2021/NewsSearchInterestByRegion.csv')
news_region.head()

Unnamed: 0,Category: All categories
Country,Data science + Machine Learning + Computer Vis...
St. Helena,100
Finland,73
Estonia,43
Bahrain,35


In [None]:
news_region.columns = news_region.iloc[0]
news_region = news_region.reset_index()
news_region.rename(columns = {'index': 'Country', 'Data science + Machine Learning + Computer Vision + Natural Language Processing + Deep Learning: (1/1/10 - 4/18/21)': 'Total'}, inplace=True)
news_region.drop(0, axis = 0, inplace = True)
news_region.head()

Country,Country.1,Total
1,St. Helena,100
2,Finland,73
3,Estonia,43
4,Bahrain,35
5,El Salvador,34


In [None]:
mapping(news_region, world_geojson, [89])