In [29]:
import csv
import pandas as pd

In [30]:
df = pd.read_csv('EmojiData.csv', delimiter=',')

In [31]:
df

Unnamed: 0,Name,icon,"EmojiXpress, mln","Instagram, mln","Twitter, mln"
0,Grinning,![image](https://pictures.s3.yandex.net/resour...,2.26,1.02,87.3
1,Beaming,![image](https://pictures.s3.yandex.net/resour...,19.1,1.69,150.0
2,ROFL,![image](https://pictures.s3.yandex.net/resour...,25.6,0.774,0.0
3,Tears of Joy,![image](https://pictures.s3.yandex.net/resour...,233.0,7.31,2270.0
4,Winking,![image](https://pictures.s3.yandex.net/resour...,15.2,2.36,264.0
5,Happy,![image](https://pictures.s3.yandex.net/resour...,22.7,4.26,565.0
6,Heart Eyes,![image](https://pictures.s3.yandex.net/resour...,64.6,11.2,834.0
7,Kissing,![image](https://pictures.s3.yandex.net/resour...,87.5,5.13,432.0
8,Thinking,![image](https://pictures.s3.yandex.net/resour...,6.81,0.636,0.0
9,Unamused,![image](https://pictures.s3.yandex.net/resour...,6.0,0.236,478.0


In [21]:
top_EmojiXpress = df[['Name', 'EmojiXpress, mln']].sort_values('EmojiXpress, mln', ascending=False).head()

In [23]:
top_Instagram = df[['Name', 'Instagram, mln']].sort_values('Instagram, mln', ascending=False).head()

In [25]:
top_Twitter = df[['Name', 'Twitter, mln']].sort_values('Twitter, mln', ascending=False).head()

In [28]:
top_EmojiXpress

Unnamed: 0,Name,"EmojiXpress, mln"
3,Tears of Joy,233.0
14,Heart,118.0
7,Kissing,87.5
6,Heart Eyes,64.6
2,ROFL,25.6


In [24]:
top_Instagram

Unnamed: 0,Name,"Instagram, mln"
14,Heart,26.0
6,Heart Eyes,11.2
3,Tears of Joy,7.31
13,Two Hearts,5.69
7,Kissing,5.13


In [26]:
top_Twitter

Unnamed: 0,Name,"Twitter, mln"
3,Tears of Joy,2270.0
14,Heart,1080.0
19,Recycle,932.0
6,Heart Eyes,834.0
15,Heart Suit,697.0



We can't sort by the data of Twitter alone - an artifact has fallen into the data.
<p>'Recycle' here looks suspicious. So sorting by others is risky: what if there are hidden problems too?</p>
<p>We need a more reliable criterion for the popularity of emoji.</p>


In [38]:
with open('EmojiData.csv') as f:
    df1 = list(csv.DictReader(f))

In [41]:
df1[0]

{'Name': 'Grinning',
 'icon': '![image](https://pictures.s3.yandex.net/resources/grinning_1548433261.png)',
 'EmojiXpress, mln': '2.26',
 'Instagram, mln': '1.02',
 'Twitter, mln': '87.3'}

In [72]:
sum_use = 0
sum_use_all = []

In [73]:
for emoji in df1:
    name = emoji['Name']
    sum_use = (float(emoji['EmojiXpress, mln']) + float(emoji['Instagram, mln']) 
               + float(emoji['Twitter, mln']))
    sum_use_all.append([name, sum_use])
    sum_use = 0


In [74]:
sum_use_all

[['Grinning', 90.58],
 ['Beaming', 170.79],
 ['ROFL', 26.374000000000002],
 ['Tears of Joy', 2510.31],
 ['Winking', 281.56],
 ['Happy', 591.96],
 ['Heart Eyes', 909.8],
 ['Kissing', 524.63],
 ['Thinking', 7.446],
 ['Unamused', 484.236],
 ['Sunglasses', 206.65],
 ['Loudly Crying', 680.05],
 ['Kiss Mark', 123.27000000000001],
 ['Two Hearts', 460.69],
 ['Heart', 1224.0],
 ['Heart Suit', 702.13],
 ['Thumbs Up', 253.85],
 ['Shrugging', 1.85],
 ['Fire', 156.99],
 ['Recycle', 932.0893]]

In [75]:
sum_use_all.sort(key = lambda n: n[1], reverse = True)

In [76]:
sum_use_all[:5]

[['Tears of Joy', 2510.31],
 ['Heart', 1224.0],
 ['Recycle', 932.0893],
 ['Heart Eyes', 909.8],
 ['Heart Suit', 702.13]]

Recycle still in top 5.
<p>Calculate a new value for the analysis of the popularity of emojis:
for each column its scale is estimated (as an average value);
each value in the column is normalized (divided by scale);
normalized values add up.</p>
<p>Call this amount the “usage index”.</p>

In [79]:
emojixpress_sum = 0
instagram_sum = 0
twitter_sum = 0
for row in df1:
    emojixpress_sum += float(row['EmojiXpress, mln'])
    instagram_sum += float(row['Instagram, mln'])
    twitter_sum += float(row['Twitter, mln'])
    
emojixpress_mean = emojixpress_sum / len(df1)
instagram_mean = instagram_sum / len(df1)
twitter_mean = twitter_sum / len(df1)

In [86]:
data_norm = []
for row in df1:
    emojixpress_normalized = float(row['EmojiXpress, mln']) / emojixpress_mean
    instagram_normalized = float(row['Instagram, mln']) / instagram_mean
    twitter_normalized = float(row['Twitter, mln']) / twitter_mean
    index = round((emojixpress_normalized + instagram_normalized + twitter_normalized), 2)
    data_norm.append([row['Name'], index])

In [91]:
data_norm.sort(key=lambda x: x[1], reverse=True)

In [92]:
data_norm

[['Tears of Joy', 13.23],
 ['Heart', 11.95],
 ['Heart Eyes', 6.31],
 ['Kissing', 4.66],
 ['Happy', 2.87],
 ['Two Hearts', 2.6],
 ['Loudly Crying', 2.41],
 ['Thumbs Up', 2.05],
 ['Heart Suit', 1.99],
 ['Recycle', 1.96],
 ['Winking', 1.56],
 ['Kiss Mark', 1.53],
 ['Sunglasses', 1.5],
 ['Beaming', 1.27],
 ['Unamused', 1.23],
 ['Fire', 1.05],
 ['ROFL', 0.92],
 ['Grinning', 0.49],
 ['Thinking', 0.35],
 ['Shrugging', 0.08]]