# Add File Name Here

In [None]:
nameOfChat = 'NAME_OF_FILE_HERE.txt'

# Imports

In [None]:
import sys, re, pandas as pd
import csv
import matplotlib as mpl
import matplotlib.pyplot as plt 
import seaborn as sns
import datetime
from wordcloud import WordCloud, STOPWORDS 
import collections
import emojis
import math
from operator import itemgetter
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

## Settings

In [None]:
pd.set_option('display.max_rows', 10)
stoplist = nltk.corpus.stopwords.words('english')

Adding the 1,000 most common English words to the stop words list.

In [None]:
with open("1-1000.txt") as f:
    wordlist = []
    for line in f:
        stripped_line = line.strip()
        wordlist.append(stripped_line)

# Creating the Initial Dataframe

## Initial Import

In [None]:
df = pd.read_csv(
    nameOfChat, sep="\n",header=0, names=["message"], skipinitialspace = True)

## Turning this into a More Useable Dataframe

### Split into timings and message

In [None]:
df = df["message"].str.split("]", n = 1, expand = True)

### Processing Dates and Times

In [None]:
datesTime = df[0].str.split(",", n = 1, expand = True)

#### Processing Dates Only

##### Initial Values

In [None]:
datesOnly = datesTime[0].str.split("/", n = 2, expand = True)
datesOnly[0] = datesOnly[0].str[1:]
datesOnly[0] = datesOnly[0].str.replace('[' ,'',regex = True)
datesOnly.rename(columns = {0:'Day', 1:'Month',
                              2:'Year'}, inplace = True)
datesOnly['FullDate'] = datesOnly['Year'] + datesOnly['Month'] + datesOnly['Day']
datesOnly['MonthYear'] = datesOnly['Year']+ datesOnly['Month']

##### Days of Week

In [None]:
datesOnly['DayOfWeek'] = pd.to_datetime(datesOnly['FullDate'], format='%Y%m%d', errors='coerce')
datesOnly['DayOfWeek'] = datesOnly['DayOfWeek'].dt.dayofweek

#### Processing Times Only

In [None]:
timesOnly = datesTime[1].str.split(":", n = 2, expand = True)
timesOnly['FullTime'] = datesTime[1]
timesOnly.rename(columns = {0:'Hour', 1:'Minutes',
                              2:'Seconds'}, inplace = True)

##### Setting Up Hours and Minutes 

This should allow easier peak times compared to using seconds as well

In [None]:
timesOnly['HourMinute'] = timesOnly['Hour'] + ":" + timesOnly['Minutes']

### Processing Message and Sender Information

In [None]:
messageProcessing = df[1].str.split(":", n = 1, expand = True)
messageProcessing.rename(columns = {0:'Sender', 1:'Message'}, inplace = True)

#### Number of Words Per Message

In [None]:
messageProcessing['NoOfWords'] = messageProcessing['Message'].str.split().str.len()

### Combining These Dataframes

#### Initial Combination

In [None]:
chatDataOriginal = pd.concat([datesOnly, timesOnly, messageProcessing], axis=1)

#### Removing Corrupted Data

In [None]:
chatData = chatDataOriginal.dropna()

##### Effect of removing data 

In [None]:
originalLength = len(chatDataOriginal.index)
newLength = len(chatData.index)
numberRemoved = originalLength - newLength
percentageRemoved = (numberRemoved/newLength)*100

In [None]:
print(str(numberRemoved) + " items were removed. This suggests that " + str(percentageRemoved) + " of the messages may have been edited.")

Items removed are likely due to messages splitting over multiple lines, for example due to a url containing "/n". As such, analysis can continue but it does mean that some of the longer messages have been removed. 

# Creating Additional Dataframes and Gathering Info

#### By Year

In [None]:
year2021 = chatData.loc[chatData['Year'] == '2021']

#### Active Members in 2021

In [None]:
activeMembers2021 = year2021['Sender'].unique()

In [None]:
numberOfActiveMembers2021 = len(activeMembers2021)

#### Number of Messages

In [None]:
totalMessages2021 = len(year2021)

# Analysis 

## Messages by Number

In [None]:
messagesSentByEachUser2021 = year2021['Sender'].value_counts()
namesOfSendersInOrderOfMessages2021 = messagesSentByEachUser2021.index.tolist()

### Making this into a bar chart

In [None]:
fig, ax1 = plt.subplots(figsize=(12,6))
fig.patch.set_facecolor('white')
chart = sns.barplot(y = messagesSentByEachUser2021, x = list(range(0, numberOfActiveMembers2021)), ax=ax1)
ax1.set_title('Messages Sent by Each Member in 2021')
ax1.set_ylabel('Number of Messages')
ax1.set_xlabel('Chat Member')
chart.set_xticklabels(namesOfSendersInOrderOfMessages2021, rotation=90)
fig.savefig('noOfMessagesByMember.png', bbox_inches='tight', dpi=600)

## Messages by Day of Week

Significant figure code comes from: https://www.kite.com/python/answers/how-to-round-a-number-to-significant-digits-in-python

In [None]:
daysOfWeek = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

messagesSentByEachDay2021 = year2021['DayOfWeek'].value_counts()
daysOfWeekInOrderOfMessages2021 = messagesSentByEachDay2021.index.tolist()
daysOfWeekInOrderOfMessages2021 = [int(x) for x in daysOfWeekInOrderOfMessages2021]
nameOfDaysOfWeekInOrderOfMessages2021 = list(itemgetter(*daysOfWeekInOrderOfMessages2021)(daysOfWeek))

nameOfBusiestDay2021 = nameOfDaysOfWeekInOrderOfMessages2021[0]
messagesOnBusiestDay2021 = max(messagesSentByEachDay2021)

percentageOnBusiestDay2021 = (messagesOnBusiestDay2021/totalMessages2021)*100
sigFig = 3
percentageOnBusiestDay2021 =  round(percentageOnBusiestDay2021, sigFig - int(math.floor(math.log10(abs(percentageOnBusiestDay2021)))) - 1)

### Making this into a bar chart

Creating freqency so in order of days of week (Mon - Sun) as opposed to count list. 

In [None]:
daysShouldHave = list(range(0,7))
messagesSentByEachDaySorted2021 = year2021['DayOfWeek'].value_counts().sort_index()

daysDoHave =  messagesSentByEachDaySorted2021.index.tolist()
daysDoHave = [int(x) for x in daysDoHave]

messagesInDayOrder = messagesSentByEachDaySorted2021.tolist()

whatDaysHaveIndex = 0
whatDaysNeedIndex = 0
actualDayFrequencies = []
maxDay = max(daysDoHave)

for day in daysShouldHave:
    dayShouldBe = daysShouldHave[whatDaysNeedIndex]
    
    if dayShouldBe <= maxDay:
        dayIs = daysDoHave[whatDaysHaveIndex]
    else: 
        dayIs = 0

    if dayShouldBe == dayIs:
        actualDayFrequencies.append(messagesInDayOrder[whatDaysHaveIndex])
        whatDaysHaveIndex += 1
        whatDaysNeedIndex += 1
    else:
        actualDayFrequencies.append(0)
        whatDaysNeedIndex += 1

Plotting into bar chart

In [None]:
fig, ax1 = plt.subplots(figsize=(12,6))
fig.patch.set_facecolor('white')
chart = sns.barplot(y = actualDayFrequencies, x = daysShouldHave, ax=ax1)
ax1.set_title('Messages by Each Day of the Week 2021')
ax1.set_ylabel('Number of Messages')
ax1.set_xlabel('Day')
chart.set_xticklabels(daysOfWeek, rotation=90)
fig.savefig('noOfMessagesByDay.png', bbox_inches='tight', dpi=600)

## Messages by Month

In [None]:
monthsOfYear = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]

messagesSentByMonth2021 = year2021['Month'].value_counts()
        
monthInOrderOfMessages2021 = messagesSentByMonth2021.index.tolist()
monthInOrderOfMessages2021 = [int(x) for x in monthInOrderOfMessages2021]
monthInOrderOfMessages2021[:] = [month - 1 for month in monthInOrderOfMessages2021]

nameOfMonthInOrderOfMessages2021 = list(itemgetter(*monthInOrderOfMessages2021)(monthsOfYear))

nameOfBusiestMonth2021 = nameOfMonthInOrderOfMessages2021[0]
messagesInBusiestMonth2021 = max(messagesSentByMonth2021)

percentageInBusiestMonth2021 = (messagesInBusiestMonth2021/totalMessages2021)*100
sigFig = 3
percentageInBusiestMonth2021 = round(percentageInBusiestMonth2021, sigFig - int(math.floor(math.log10(abs(percentageInBusiestMonth2021)))) - 1)

Setting up so can have bar chart in order

In [None]:
monthsShouldHave = list(range(0,12))
messagesByMonthSorted = year2021['Month'].value_counts().sort_index()

monthsDoHave =  messagesByMonthSorted.index.tolist()
monthsDoHave = [int(x) for x in monthsDoHave]

messagesInTimeOrder = messagesByMonthSorted.tolist()

whatMonthsHaveIndex = 0
whatMonthsNeedIndex = 0
actualMonthFrequencies = []
maxMonth = max(monthsDoHave)

for month in monthsShouldHave:
    monthShouldBe = monthsShouldHave[whatMonthsNeedIndex]

    if monthShouldBe < maxMonth:
        monthIs = monthsDoHave[whatMonthsHaveIndex] - 1
    else: 
        monthIs = 0

    if monthShouldBe == monthIs:
        actualMonthFrequencies.append(messagesByMonthSorted[whatMonthsHaveIndex])
        whatMonthsHaveIndex += 1
        whatMonthsNeedIndex += 1
    else:
        actualMonthFrequencies.append(0)
        whatMonthsNeedIndex += 1

Plotting as bar chart

In [None]:
fig, ax1 = plt.subplots(figsize=(12,6))
chart = sns.barplot(y = actualMonthFrequencies, x = monthsShouldHave, ax=ax1)
chart.set_xticklabels(monthsOfYear, rotation=90)
ax1.set_title('Messages Sent in Each Month')
ax1.set_ylabel('Messages')
ax1.set_xlabel('Month')
fig.savefig('noOfMessagesByMonth.png', bbox_inches='tight', dpi=600)

## Messages by Hour

In [None]:
messagesSentByHour2021 = year2021['Hour'].value_counts()

hrsInOrderOfMessagesSent2021 = messagesSentByHour2021.index.tolist()
hrsInOrderOfMessagesSent2021 = [int(x) for x in hrsInOrderOfMessagesSent2021]

messagesInBusiestHour2021 = max(messagesSentByHour2021)

percentageInBusiestHour2021 = (messagesInBusiestHour2021/totalMessages2021)*100
sigFig = 3
percentageInBusiestHour2021 =  round(percentageInBusiestHour2021, sigFig - int(math.floor(math.log10(abs(percentageInBusiestHour2021)))) - 1)

### Making this into a bar chart

Set up

In [None]:
hrsShouldHave = list(range(0,24))
messagesByHrSorted = year2021['Hour'].value_counts().sort_index()

hrsDoHave =  messagesByHrSorted.index.tolist()
hrsDoHave = [int(x) for x in hrsDoHave]

messagesInTimeOrder = messagesByHrSorted.tolist()

whatHrsHaveIndex = 0
whatHrsNeedIndex = 0
actualHrFrequencies = []
maxHr = max(hrsDoHave)

for hr in hrsShouldHave:
    hrShouldBe = hrsShouldHave[whatHrsNeedIndex]
    
    if hrShouldBe <= maxHr:
        hrIs = hrsDoHave[whatHrsHaveIndex]
    else: 
        hrIs = 0
              
    if hrShouldBe == hrIs:
        actualHrFrequencies.append(messagesInTimeOrder[whatHrsHaveIndex])
        whatHrsHaveIndex += 1
        whatHrsNeedIndex += 1
    else:
        actualHrFrequencies.append(0)
        whatHrsNeedIndex += 1

Plotting

In [None]:
fig, ax1 = plt.subplots(figsize=(12,6))
chart = sns.barplot(y = actualHrFrequencies, x = hrsShouldHave, ax=ax1)
chart.set_xticklabels(hrsShouldHave, rotation=90)
ax1.set_title('Messages Sent in Each Hour')
ax1.set_ylabel('Number Of Messages')
ax1.set_xlabel('Hour')
fig.savefig('noOfMessagesByHour.png', bbox_inches='tight', dpi=600)

## Messages by Day

In [None]:
busiestOverallDay = year2021['FullDate'].value_counts().index[0]
numberOfMessagesOnBusiestDay = year2021['FullDate'].value_counts()[0]
busiestOverallDayNice = str(list(year2021.loc[year2021["FullDate"] == busiestOverallDay].Day)[0]) + "/"+ str(list(year2021.loc[year2021["FullDate"] == busiestOverallDay].Month)[0])

In [None]:
busiestOverallDayNice

## Text Analysis

### Building a Bag Of Words

Improving stopwords based on what has appeared - gifs/images come from the effect of removeing media

In [None]:
newStopWords = ['omitted', 'image','gif', 'co', 'https', 'www', 'video', '\u200eimage', 'gif']
stoplist.extend(newStopWords)
stoplist.extend(wordlist)

Building the overall bag of words (called word soup!)

In [None]:
wordSoup = ''
for val in year2021.Message:
     
    # typecaste each val to string
    val = str(val)
 
    # split the value
    tokens = val.split()
     
    # Converts each token into lowercase
    for i in range(len(tokens)):
        tokens[i] = tokens[i].lower()
     
    wordSoup += " ".join(tokens)+" "

wordSoupString = wordSoup
wordSoup = word_tokenize(wordSoup)
filtered = []
 
for w in wordSoup:
    if w not in stoplist:
        w = re.sub(r'[^\w\s]', '', w)
        filtered.append(w)
        
filtered = filter(lambda a: a != '', filtered)
filtered = filter(lambda a: len(a) > 2, filtered)

In [None]:
mostCommonWord = collections.Counter(filtered)
mostCommonWords = mostCommonWord.most_common()

#### Building a Bag of Words for each member of the chat 

In [None]:
listOfWordSoups = []
listOfFilteredFun = []
index = 0
while index < numberOfActiveMembers2021:
    userWordSoup = ''
    for val in year2021.loc[year2021["Sender"] == activeMembers2021[index]].Message:
        val = str(val)
        tokens = val.split()
        for i in range(len(tokens)):
            tokens[i] = tokens[i].lower()
        
        userWordSoup+= " ".join(tokens)+" "
    
    listOfWordSoups.append(userWordSoup)
    userWordSoup = word_tokenize(userWordSoup)
    userFiltered = []
 
    for w in userWordSoup:
        if w not in stoplist:
            w = re.sub(r'[^\w\s]', '', w)
            userFiltered.append(w)
        
    userFiltered = filter(lambda a: a != '', userFiltered)
    userFiltered = filter(lambda a: len(a) > 2, userFiltered)
    
    
    index += 1
    
    listOfFilteredFun.append(userFiltered)

Getting most common words for each user

In [None]:
mostCommonWordsByUser = []
index = 0
while index < numberOfActiveMembers2021:
    userMostCommonWord = collections.Counter(listOfFilteredFun[index])
    userMostCommonWords = userMostCommonWord.most_common()
    mostCommonWordsByUser.append(userMostCommonWords)
    index += 1

### Word Cloud

In [None]:
stopList = str(stoplist)
wordcloud = WordCloud(width = 800, height = 800,
                background_color ='white',
                stopwords = stoplist,
                min_font_size = 10).generate(wordSoupString)

# plot the WordCloud image                      
fig = plt.figure(figsize = (8, 8), facecolor = None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad = 0)
 
plt.show()
fig.savefig('messageAura.png', bbox_inches='tight', dpi=600)

## Emoji Analysis

### Emoji Aura

This is adapted from https://stackoverflow.com/questions/66473771/wordcloud-for-only-emojis
Aura used in reference to Spotify Wrapped this year - it's just a word cloud :)

In [None]:
class EmojiCloud:
    def __init__(self, font_path='EmojiOneColor.otf'):
        self.font_path = font_path
        self.word_cloud = self.initialize_wordcloud()
        self.emoji_probability = None

        
    def initialize_wordcloud(self):
        return WordCloud(font_path=self.font_path,
                               width=2000,
                               height=1000,
                               background_color='white',
                               random_state=42,
                               collocations=False)

    def generate(self, text):
        emoji_frequencies = collections.Counter(emojis.iter(text))
        total_count = sum(emoji_frequencies.values())
        
        self.emoji_probability = {emoji: count/total_count for emoji, count in emoji_frequencies.items()}
        wc = self.word_cloud.generate_from_frequencies(emoji_frequencies)
        return wc
    
emoji_cloud = EmojiCloud(font_path='./EmojiOneColor.otf')

In [None]:
emojiAura = emoji_cloud.generate(wordSoupString)

fig, ax1= plt.subplots(figsize=(12,12))
fig.patch.set_facecolor('white')
plt.imshow(emojiAura)
plt.axis('off')
fig.savefig('emojiAura.png', bbox_inches='tight', dpi=600)

### Emoji Frequencies

In [None]:
emojiFrequencies = collections.Counter(emojis.iter(wordSoupString)).most_common()
emojiFrequenciesByUser = []

index = 0
while index < numberOfActiveMembers2021:
    userEmojiFreq = collections.Counter(emojis.iter(listOfWordSoups[index])).most_common()
    emojiFrequenciesByUser.append(userEmojiFreq)
    index += 1

Number of emojis used in each message by user. This hasn't been converted into any output and doesn't quite work yet 

In [None]:
userEmojiProportions = []
index = 0

while index < numberOfActiveMembers2021:
    currentEmojiFreqs = emojiFrequenciesByUser[index]
    emojiSum = 0
    for emojiVal in currentEmojiFreqs:
        emojiSum += emojiVal[1]
        
    messagesSentByThatUser = len(year2021.loc[year2021["Sender"] == activeMembers2021[index]])
    print(activeMembers2021[index])
    userEmojiProportions.append(emojiSum/messagesSentByThatUser)
    
    index +=1

## Image Analysis

TODO

# Wrapped

## Messages by Number

In [None]:
print("*** Messages by Number ***")
print("This year " + str(totalMessages2021) + " messages were sent by " + str(numberOfActiveMembers2021) + " chat members")
print("The top messenger this year was " + namesOfSendersInOrderOfMessages2021[0] + " who sent " + str(messagesSentByEachUser2021[0]) + " messages. \n")
print("The full rankings are:")

index = 0
for name in namesOfSendersInOrderOfMessages2021:
    print(str(index + 1) + ". " + namesOfSendersInOrderOfMessages2021[index] + " with " + str(messagesSentByEachUser2021[index]) + " messages")
    index += 1

plt.figure(figsize = (8, 8), facecolor = None)
img = plt.imread('noOfMessagesByMember.png')
plt.imshow(img)
plt.axis("off")
plt.tight_layout(pad = 0)

## Messages by Day of Week

In [None]:
print("*** Messages by Day of Week ***")
print("You chatted most on " + nameOfBusiestDay2021 +"s this year. ")
print(str(messagesOnBusiestDay2021) + " messages were sent on " + nameOfBusiestDay2021 +"s. That's " + str(percentageOnBusiestDay2021) + "% of the total! \n")
print("The full rankings are:")

index = 0
for day in nameOfDaysOfWeekInOrderOfMessages2021:
    print(str(index + 1) + ". " + nameOfDaysOfWeekInOrderOfMessages2021[index] + " with " + str(list(messagesSentByEachDay2021)[index]) + " messages")
    index += 1
    
plt.figure(figsize = (8, 8), facecolor = None)
img = plt.imread('noOfMessagesByDay.png')
plt.imshow(img)
plt.axis("off")
plt.tight_layout(pad = 0)

## Messages by Month

In [None]:
print("*** Messages by Month ***")
print("You chatted most in " + nameOfBusiestMonth2021 +" this year. ")
print(str(messagesInBusiestMonth2021) + " messages were sent in " + nameOfBusiestMonth2021 +". That's " + str(percentageInBusiestMonth2021) + "% of the total! \n")
print("The top months were:")

index = 0
if len(nameOfMonthInOrderOfMessages2021) < 5:
    maxIndex = len(nameOfMonthInOrderOfMessages2021)
else:
    maxIndex = 5
while index < maxIndex:
    print(str(index + 1) + ". " + nameOfMonthInOrderOfMessages2021[index] + " with " + str(list(messagesSentByMonth2021)[index]) + " messages")
    index += 1
    
plt.figure(figsize = (8, 8), facecolor = None)
img = plt.imread('noOfMessagesByMonth.png')
plt.imshow(img)
plt.axis("off")
plt.tight_layout(pad = 0)

## Messages by Time of Day

In [None]:
print("You chatted most between " + str(hrsInOrderOfMessagesSent2021[0]) + ":00 and " +  str(hrsInOrderOfMessagesSent2021[0] + 1) +":00 this year. ")
print(str(messagesInBusiestHour2021) + " messages were sent in this hour. That's " + str(percentageInBusiestHour2021) + "% of the total! \n")
print("The top 5 hours are:")

index = 0
if len(hrsInOrderOfMessagesSent2021) < 5:
    maxIndex = len(nameOfMonthInOrderOfMessages2021)
else:
    maxIndex = 5
    
while index < maxIndex:
    if hrsInOrderOfMessagesSent2021[index] == 23:
        nextHr = "0:00"
    else:
        nextHr = str(hrsInOrderOfMessagesSent2021[index] + 1) + ":00"
        
    print(str(index + 1) + ". " + str(hrsInOrderOfMessagesSent2021[index]) + ":00 to " + nextHr + " with " + str(messagesSentByHour2021[index]) + " messages")
    index += 1

plt.figure(figsize = (8, 8), facecolor = None)
img = plt.imread('noOfMessagesByHour.png')
plt.imshow(img)
plt.axis("off")
plt.tight_layout(pad = 0)

## Messages by Times that Don't Fit Elsewhere

In [None]:
print("The day with the most messages was " + busiestOverallDayNice + " when " + str(numberOfMessagesOnBusiestDay) + " messages were sent.")


## Emoji Analysis

In [None]:
print("*** Most Frequent Emoji ***")
print("The most used emoji was " + emojiFrequencies[0][0] +" this year. It was used " + str(emojiFrequencies[0][1]) + " times. \n")

print("The full rankings are:")

index = 0
if len(emojiFrequencies) < 5:
    indexMax = len(emojiFrequencies) - 1
    print(indexMax)
else:
    indexMax = 5

while index < indexMax:
    print(str(index + 1) + ". " + emojiFrequencies[index][0] + " with " + str(emojiFrequencies[index][1]) + " usages")
    index += 1

print("\n")

index = 0
activeMembers2021[index]
for user in activeMembers2021:
    if len(emojiFrequenciesByUser[index]) != 0:
        print(activeMembers2021[index] + "'s top emoji is " + emojiFrequenciesByUser[index][0][0] + " with " + str(emojiFrequenciesByUser[index][0][1]) + " uses.")
    else:
        print(activeMembers2021[index] + " didn't use any emojis this year!")
    index += 1

print("\nThe chat's emoji aura is:")
plt.figure(figsize = (8, 8), facecolor = None)
img = plt.imread('emojiAura.png')
plt.imshow(img)
plt.axis("off")
plt.tight_layout(pad = 0)

## Text Analysis

In [None]:
print("*** How You Message ***")

print("After removing 1,000 of the most common words in English this is what's left - these are the words that make your chat, your chat!")

index = 0
if len(mostCommonWords) < 5:
    indexMax = len(emojiFrequencies) - 1
    print(indexMax)
else:
    indexMax = 5

while index < indexMax:
    print(str(index + 1) + ". " + mostCommonWords[index][0] + " with " + str(mostCommonWords[index][1]) + " uses")
    index += 1

print("\n")

index = 0
for user in activeMembers2021:
    if len(mostCommonWordsByUser[index]) != 0:
        print(activeMembers2021[index] + "'s defining word is " + "'" +  mostCommonWordsByUser[index][0][0]  + "'")
    else:
        print(activeMembers2021[index] + " doesn't have a defining word this year!")
    index += 1

print("\nThe chat's message aura is:")
plt.figure(figsize = (8, 8), facecolor = None)
img = plt.imread('messageAura.png')
plt.imshow(img)
plt.axis("off")
plt.tight_layout(pad = 0)

# Full Wrapped

In [None]:
print("*** Messages by Number ***")
print("This year " + str(totalMessages2021) + " messages were sent by " + str(numberOfActiveMembers2021) + " chat members")
print("The top messenger this year was " + namesOfSendersInOrderOfMessages2021[0] + " who sent " + str(messagesSentByEachUser2021[0]) + " messages. \n")
print("The full rankings are:")

index = 0
for name in namesOfSendersInOrderOfMessages2021:
    print(str(index + 1) + ". " + namesOfSendersInOrderOfMessages2021[index] + " with " + str(messagesSentByEachUser2021[index]) + " messages")
    index += 1

plt.figure(figsize = (8, 8), facecolor = None)
img = plt.imread('noOfMessagesByMember.png')
plt.imshow(img)
plt.axis("off")
plt.tight_layout(pad = 0)
plt.show()

print("*** Messages by Day of Week ***")
print("You chatted most on " + nameOfBusiestDay2021 +"s this year. ")
print(str(messagesOnBusiestDay2021) + " messages were sent on " + nameOfBusiestDay2021 +"s. That's " + str(percentageOnBusiestDay2021) + "% of the total! \n")
print("The full rankings are:")

index = 0
for day in nameOfDaysOfWeekInOrderOfMessages2021:
    print(str(index + 1) + ". " + nameOfDaysOfWeekInOrderOfMessages2021[index] + " with " + str(list(messagesSentByEachDay2021)[index]) + " messages")
    index += 1
    
plt.figure(figsize = (8, 8), facecolor = None)
img = plt.imread('noOfMessagesByDay.png')
plt.imshow(img)
plt.axis("off")
plt.tight_layout(pad = 0)
plt.show()

print("*** Messages by Month ***")
print("You chatted most in " + nameOfBusiestMonth2021 +" this year. ")
print(str(messagesInBusiestMonth2021) + " messages were sent in " + nameOfBusiestMonth2021 +". That's " + str(percentageInBusiestMonth2021) + "% of the total! \n")
print("The top months were:")

index = 0
if len(nameOfMonthInOrderOfMessages2021) < 5:
    maxIndex = len(nameOfMonthInOrderOfMessages2021)
else:
    maxIndex = 5
while index < maxIndex:
    print(str(index + 1) + ". " + nameOfMonthInOrderOfMessages2021[index] + " with " + str(list(messagesSentByMonth2021)[index]) + " messages")
    index += 1
    
plt.figure(figsize = (8, 8), facecolor = None)
img = plt.imread('noOfMessagesByMonth.png')
plt.imshow(img)
plt.axis("off")
plt.tight_layout(pad = 0)
plt.show()

print("*** Messages by Hour ***")
print("You chatted most between " + str(hrsInOrderOfMessagesSent2021[0]) + ":00 and " +  str(hrsInOrderOfMessagesSent2021[0] + 1) +":00 this year. ")
print(str(messagesInBusiestHour2021) + " messages were sent in this hour. That's " + str(percentageInBusiestHour2021) + "% of the total! \n")
print("The top 5 hours are:")

index = 0
if len(hrsInOrderOfMessagesSent2021) < 5:
    maxIndex = len(nameOfMonthInOrderOfMessages2021)
else:
    maxIndex = 5
    
while index < maxIndex:
    if hrsInOrderOfMessagesSent2021[index] == 23:
        nextHr = "0:00"
    else:
        nextHr = str(hrsInOrderOfMessagesSent2021[index] + 1) + ":00"
        
    print(str(index + 1) + ". " + str(hrsInOrderOfMessagesSent2021[index]) + ":00 to " + nextHr + " with " + str(messagesSentByHour2021[index]) + " messages")
    index += 1

plt.figure(figsize = (8, 8), facecolor = None)
img = plt.imread('noOfMessagesByHour.png')
plt.imshow(img)
plt.axis("off")
plt.tight_layout(pad = 0)
plt.show()

print("The day with the most messages was " + busiestOverallDayNice + " when " + str(numberOfMessagesOnBusiestDay) + " messages were sent.\n")

print("*** Most Frequent Emoji ***")
print("The most used emoji was " + emojiFrequencies[0][0] +" this year. It was used " + str(emojiFrequencies[0][1]) + " times. \n")

print("The full rankings are:")

index = 0
if len(emojiFrequencies) < 5:
    indexMax = len(emojiFrequencies) - 1
    print(indexMax)
else:
    indexMax = 5

while index < indexMax:
    print(str(index + 1) + ". " + emojiFrequencies[index][0] + " with " + str(emojiFrequencies[index][1]) + " usages")
    index += 1

print("\n")

index = 0
activeMembers2021[index]
for user in activeMembers2021:
    if len(emojiFrequenciesByUser[index]) != 0:
        print(activeMembers2021[index] + "'s top emoji is " + emojiFrequenciesByUser[index][0][0] + " with " + str(emojiFrequenciesByUser[index][0][1]) + " uses.")
    else:
        print(activeMembers2021[index] + " didn't use any emojis this year!")
    index += 1

print("\nThe chat's emoji aura is:")
plt.figure(figsize = (8, 8), facecolor = None)
img = plt.imread('emojiAura.png')
plt.imshow(img)
plt.axis("off")
plt.tight_layout(pad = 0)
plt.show()

print("*** How You Message ***")

print("After removing 1,000 of the most common words in English this is what's left - these are the words that make your chat, your chat!")

index = 0
if len(mostCommonWords) < 5:
    indexMax = len(emojiFrequencies) - 1
    print(indexMax)
else:
    indexMax = 5

while index < indexMax:
    print(str(index + 1) + ". " + mostCommonWords[index][0] + " with " + str(mostCommonWords[index][1]) + " uses")
    index += 1

print("\n")

index = 0
for user in activeMembers2021:
    if len(mostCommonWordsByUser[index]) != 0:
        print(activeMembers2021[index] + "'s defining word is " + "'" +  mostCommonWordsByUser[index][0][0]  + "'")
    else:
        print(activeMembers2021[index] + " doesn't have a defining word this year!")
    index += 1

print("\nThe chat's message aura is:")
plt.figure(figsize = (8, 8), facecolor = None)
img = plt.imread('messageAura.png')
plt.imshow(img)
plt.axis("off")
plt.tight_layout(pad = 0)
plt.show()