In [None]:
%matplotlib notebook
import matplotlib.pyplot as plt
import json

TWEETS_DATA_PATH = 'tweet_mining.json'

results = []
with open(TWEETS_DATA_PATH) as tweets_file:
    for tweet_line in tweets_file:
        try:
            status = json.loads(tweet_line)
            results.append(status)
        except ValueError:
            pass

print(len(results))

In [None]:
import pandas

# create a DataFrame
statuses = pandas.DataFrame()

# store the text values
statuses['text'] = [status['text'] for status in results]
# store the Language values
statuses['lang'] = [status['lang'] for status in results]
# sometimes the 'place' is blank in the tweet. so use N/A in that case
statuses['country'] = [status['place']['country'] if status['place'] else 'N/A' for status in results]

print(statuses.head())

In [None]:
# get each tweet Language and the count of its appearance (not to be confused with programming languages)
tweets_by_lang = statuses['lang'].value_counts()
# get each tweet country of origin and the count of its appearance
tweets_by_country = statuses['country'].value_counts()

print(tweets_by_lang.head())
print(tweets_by_country.head())

In [None]:
# create our drawing space (figure)
fig = plt.figure()
fig.subplots_adjust(hspace=.9)

# prepare to plot two charts on same figure
ax1 = fig.add_subplot(2,1,1)
ax2 = fig.add_subplot(2,1,2)

# style the axes
ax1.tick_params(axis='x', labelsize=15)
ax1.tick_params(axis='y', labelsize=10)
ax1.set_xlabel('Tweet languages', fontsize=15)
ax1.set_ylabel('Number of tweets', fontsize=15)
ax1.xaxis.label.set_color('#666666')
ax1.yaxis.label.set_color('#666666')
ax1.tick_params(axis='x', colors ='#666666')
ax1.tick_params(axis='y', colors ='#666666')
# style the title
ax1.set_title('Top 10 languages', fontsize=15, color='#666666')

# plot the top 10 tweet languages and appearance count using a bar chart
tweets_by_lang[:10].plot(ax=ax1, kind='bar', color='#FF7A00')

# color the spines (borders)
for spine in ax1.spines.values():
    spine.set_edgecolor('#666666')
    
#second subplot
ax2.tick_params(axis='x', labelsize=15)
ax2.tick_params(axis='y', labelsize=10)
ax2.set_xlabel('Countries', fontsize=15)
ax2.set_ylabel('Number of tweets', fontsize=15)
ax2.xaxis.label.set_color('#666666')
ax2.yaxis.label.set_color('#666666')
ax2.tick_params(axis='x', colors ='#666666')
ax2.tick_params(axis='y', colors ='#666666')
# style the title
ax1.set_title('Top 10 Countries', fontsize=15, color='#666666')

# plot the top 10 tweet languages and appearance count using a bar chart
tweets_by_country[:10].plot(ax=ax2, kind='bar', color='#FF7A00')

# color the spines (border)
for spine in ax2.spines.values():
    spine.set_edgecolor('#666666')
    
# render the two graphs at once
plt.show()