EMOJI

Emoji  are ideograms and smileys used in electronic messages and web pages. Some examples of emoji are 😃, 🧘🏻‍♂️, 🌍, 🍞, 🚗, 📞, 🎉, ♥️, and 🏁. Emoji exist in various genres, including facial expressions, common objects, places and types of weather, and animals. They are much like emoticons, but emoji are pictures rather than typographic approximations; the term "emoji" in the strict sense refers to such pictures which can be represented as encoded characters, but it is sometimes applied to messaging stickers by extension.
https://en.wikipedia.org/wiki/Emoji

![](https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTOuC-bvw2CEvxTV76VThnoWV_EZmsHvn36ig&usqp=CAU)twinkl.com.br

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import math
from textwrap import wrap
warnings.filterwarnings('ignore')
sns.set_palette('Set2')
sns.set_style('darkgrid')

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('../input/emoji-data-descriptions-codepoints/emoji_df.csv', encoding='utf8')
df.head()

In [None]:
#Code by Mohammad Imran Shaikh https://www.kaggle.com/shikhnu/covid19-tweets-eda-visualization-wordcloud

unique_df = pd.DataFrame()
unique_df['Features'] = df.columns
unique=[]
for i in df.columns:
    unique.append(df[i].nunique())
unique_df['Uniques'] = unique

f, ax = plt.subplots(1,1, figsize=(15,7))

splot = sns.barplot(x=unique_df['Features'], y=unique_df['Uniques'], alpha=0.8)
for p in splot.patches:
    splot.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center',
                   va = 'center', xytext = (0, 9), textcoords = 'offset points')
plt.title('Bar plot for number of unique values in each column',weight='bold', size=15)
plt.ylabel('#Unique values', size=12, weight='bold')
plt.xlabel('Features', size=12, weight='bold')
plt.xticks(rotation=90)
plt.show()

In [None]:
#word cloud
from wordcloud import WordCloud, ImageColorGenerator
text = " ".join(str(each) for each in df.group)
# Create and generate a word cloud image:
wordcloud = WordCloud(max_words=200,colormap='Set1', background_color="purple").generate(text)
plt.figure(figsize=(10,6))
plt.figure(figsize=(15,10))
# Display the generated image:
plt.imshow(wordcloud, interpolation='Bilinear')
plt.axis("off")
plt.figure(1,figsize=(12, 12))
plt.show()

#Codes by YiYuan https://www.kaggle.com/latong/food-review-text-summarization/notebook

In [None]:
sum=df['name'].str.len()
print(sum)

In [None]:
# let's check the length of name, the average length is 20 characters.
df['name length'] = df['name'].apply(len)
df['name length'].describe()

In [None]:
sns.boxplot(x='group', y=df['name length'], data=df)

In [None]:
sum=df['emoji'].str.len()
print(sum)

In [None]:
# let's check the length of emojis, the average length is 20 characters.
df['emoji length'] = df['emoji'].apply(len)
df['emoji length'].describe()

In [None]:
sns.boxplot(x='group', y=df['emoji length'], data=df)

In [None]:
#pd.set_option('display.max_colwidth', -1)
#df_NA = pd.DataFrame(data=[df.isna().sum().tolist(), ["{:.2f}".format(i)+'%' for i in (df.isna().sum()/df.shape[0]*100).tolist()]], 
           # columns=df.columns, index=['NA_Count', 'NA_Percent']).transpose()
#df_NA[df_NA['NA_Count']>0].transpose()

In [None]:
s = (df.isna().sum()/df.shape[0]*100)<50
df_modified = df[s.index[s].tolist()]
print (df_modified.shape)
df_modified.head()

In [None]:
plt.rcParams['font.size'] = 14
fig, ax = plt.subplots(3, 2, figsize=(20,20))
for col, ax in zip(['emoji','name','group','sub_group','codepoints'], ax.flat):
    dict_ = df_modified[col].value_counts().head(10).to_dict()
    if ('Not Available' in dict_.keys()):
        dict_.pop('Not Available')
    labels = []
    for i in dict_.keys():
        i = i.split(' ')
        if (len(i) > 6):
            i[math.ceil(len(i)/2)-1] += '\n'
            labels.append(' '.join(i))
        else:
            labels.append(' '.join(i))
    ax.pie(x=list(dict_.values()), labels=labels, shadow=True, startangle=0)
    
    col = (' '.join(col.split('_'))).upper()
    ax.set_title(col, weight='bold', fontsize=18)
plt.tight_layout()
plt.show()

In [None]:
#Code by Savita Nair https://www.kaggle.com/savitanair/hr-analytics

print(f'Dataset has {len(df.group.unique())} unique groups')
print('*'*20)
print(f'And the top 10 counts are :')
print(df.group.value_counts().head(10))
print('*'*20)

c = df.group.value_counts().head(10)
fig, ax = plt.subplots(1,1,figsize=(12,6))
ax.bar(c.index, c.values, width=0.8, color='y')
plt.xticks(rotation=45)

In [None]:
#Code by Savita Nair https://www.kaggle.com/savitanair/hr-analytics

print(f'Dataset has {len(df.emoji.unique())} unique emojis')
print('*'*20)
print(f'And the top 10 counts are :')
print(df.emoji.value_counts().head(10))
print('*'*20)

c = df.emoji.value_counts().head(10)
fig, ax = plt.subplots(1,1,figsize=(12,6))
ax.bar(c.index, c.values, width=0.8, color='r')

In [None]:
#Code by Savita Nair https://www.kaggle.com/savitanair/hr-analytics

print(f'Dataset has {len(df.name.unique())} unique names')
print('*'*20)
print(f'And the top 10 counts are :')
print(df.name.value_counts().head(10))
print('*'*20)

c = df.name.value_counts().head(10)
fig, ax = plt.subplots(1,1,figsize=(12,6))
ax.bar(c.index, c.values, width=0.8, color='b')
plt.xticks(rotation=45)

In [None]:
colunas = ['emoji', 'name', 'group', 'sub_group', 'codepoints']
for i in colunas:
  fig, ax = plt.subplots(1,1, figsize=(15, 6))
  sns.countplot(y = df[i][1:],data=df.iloc[1:], order=df[i][1:].head(10).value_counts().index, palette='Blues_r')
  fig.text(0.1, 0.95, f'{df[i][0].split("(")[0]}', fontsize=16, fontweight='bold', fontfamily='serif')
  plt.xlabel(' ', fontsize=20)
  plt.ylabel('')
  plt.yticks(fontsize=13)
  plt.box(False)

In [None]:
#Code by Olga Belitskaya https://www.kaggle.com/olgabelitskaya/sequential-data/comments
from IPython.display import display,HTML
c1,c2,f1,f2,fs1,fs2=\
'#eb3434','#eb3446','Akronim','Smokum',30,15
def dhtml(string,fontcolor=c1,font=f1,fontsize=fs1):
    display(HTML("""<style>
    @import 'https://fonts.googleapis.com/css?family="""\
    +font+"""&effect=3d-float';</style>
    <h1 class='font-effect-3d-float' style='font-family:"""+\
    font+"""; color:"""+fontcolor+"""; font-size:"""+\
    str(fontsize)+"""px;'>%s</h1>"""%string))
    
    
dhtml('Be patient. Marília Prata, @mpwolke was Here Looping.' )