In [28]:
import pandas as pd
import emoji

# Load JSON file into a DataFrame
json_file_path = 'apes_messages.json'
df = pd.read_json(json_file_path)

# Remove the first 3 rows and reset the index
df = df.iloc[3:].reset_index(drop=True) 

# Convert 'timestamp' to datetime and extract date and time components
df['timestamp'] = pd.to_datetime(df['timestamp'], format='%b %d, %Y %I:%M:%S%p')
df['date'] = pd.to_datetime(df['timestamp'].dt.date).dt.strftime('%Y-%m-%d') #convert date to datetime before formatting specifc date
df['time'] = df['timestamp'].dt.time

#df.drop(columns=['timestamp'], inplace=True) # modify the DataFrame in place without creating a new one

# Display the DataFrame
print(df)



                sender                                        content  \
0     Nguyễn Mạnh Phúc                      Anyone wanna hang out tmr   
1                Mi Mi                                           :)))   
2     Nguyễn Mạnh Phúc                                            Yes   
3                Mi Mi               Is it yours @Nguyễn Mạnh Phúc  ?   
4                Mi Mi                                                  
...                ...                                            ...   
6999          Vân Thúy                                  :)) nữa hả 😆🤣   
7000       Ly Huong Do          But yeah here’s one more lol❤Vân Thúy   
7001       Ly Huong Do  I know we already have tons of group😆Vân Thúy   
7002       Ly Huong Do                                            Ayo   
7003       Ly Huong Do            Ly named the group Viet Lao gang 🌞.   

               timestamp        date      time  
0    2023-11-03 21:53:43  2023-11-03  21:53:43  
1    2023-11-03 20:10:04 

In [29]:
import plotly.express as px

sender_counts = df['sender'].value_counts()

# Assuming sender_counts is a Pandas Series with sender names as indices
fig = px.bar(x=sender_counts.index, y=sender_counts.values, labels={'x': 'Apes', 'y': 'Message Count'}, 
             title='Message Count by Ape')
# Move the title to the middle
fig.update_layout(title_x=0.5)  # Adjust the y position of the title
# Show the plot
fig.show()


In [30]:

# Convert 'timestamp' column to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'], format='%b %d, %Y %I:%M:%S%p', errors='coerce')

# Group by 'timestamp' for message count
message_freq = df.groupby(pd.Grouper(key='timestamp', freq='D')).size().reset_index(name='message_count')

# Create Plotly figure
fig = px.line(message_freq, x='timestamp', y='message_count', title='Message Frequency Over Time',
              labels={'timestamp': 'Date', 'message_count': 'Message Count'})

# Move the title to the middle
fig.update_layout(title_x=0.5)  # Adjust the y position of the title

# Show the plot
fig.show()


In [31]:
# Extract hour of the day
df['hour'] = df['timestamp'].dt.hour

# Group by 'hour' and 'sender' for message count
message_freq_by_hour = df.groupby(['hour', 'sender']).size().reset_index(name='message_count')

# Create a list of unique senders and hours
senders = message_freq_by_hour['sender'].unique()
hours = message_freq_by_hour['hour'].unique()

# Create a 2D array to store message counts for each sender and hour
message_counts = [[0] * len(senders) for _ in range(len(hours))]

# Fill the array with message counts
for _, row in message_freq_by_hour.iterrows():
    sender_index = list(senders).index(row['sender'])
    hour_index = list(hours).index(row['hour'])
    message_counts[hour_index][sender_index] = row['message_count']

# Create the heatmap using plotly.express
fig = px.imshow(message_counts, x=senders, y=hours, color_continuous_scale='Viridis')

# Set axis labels and title
fig.update_layout(
    xaxis=dict(title='Sender'),
    yaxis=dict(title='Hour of Day'),
    title='Message Frequency by Sender and Hour of Day'
)

# Show the plot
fig.show()

<<<<<<<<<<<<<<<<<< EMOJI TO BE CONSIDERED >>>>>>>>>>>>>>>>>>

In [32]:
# Function to count emojis in a string and return emojis
def emoji_count(string, unique=False):
    emojis = emoji.distinct_emoji_list(string) if unique else emoji.emoji_list(string)
    count = len(emojis)
    return count, emojis

# Apply the function to count emojis and create a new column
df['emoji_count'], df['emojis'] = zip(*df['content'].apply(lambda x: emoji_count(x, unique=False)))

# Apply the function to count unique emojis and create a new column
df['unique_emoji_count'], df['emojis'] = zip(*df['content'].apply(lambda x: emoji_count(x, unique=True)))

# Create a new DataFrame with desired columns
new_df = df[['sender', 'content', 'date', 'time','emoji_count','unique_emoji_count','emojis']].copy()

In [33]:
# Create a new DataFrame with each unique emoji and its count
emoji_freq_df = new_df['emojis'].explode().value_counts().reset_index()
emoji_freq_df.columns = ['emojis', 'frequency']

# Select the top 10 or 20 emojis
top_n = 20  # Change this value to the desired number
top_emojis_df = emoji_freq_df.head(top_n)

# Plot the bar chart
fig = px.bar(top_emojis_df, x='emojis', y='frequency', title=f'Top {top_n} Emojis frequently used')
fig.update_layout(xaxis_title='Emoji', yaxis_title='Frequency', title_x=0.5)

fig.show()


In [34]:
# Get unique senders from the 'sender' column
unique_senders = new_df['sender'].unique()

# List to store individual sender emoji frequency DataFrames
sender_emoji_dfs = []

# Iterate through senders, calculate emoji frequency, and store in the list
for sender in unique_senders:
    sender_df = new_df[new_df['sender'] == sender]
    
    # Count the occurrences of each unique emoji
    emoji_freq_series = sender_df['emojis'].explode().value_counts()
    
    # Convert the Series to a DataFrame
    emoji_freq_df = emoji_freq_series.reset_index()
    
    # Rename columns
    emoji_freq_df.columns = ['emoji', 'emoji_count']
    
    # Include the 'sender' column
    emoji_freq_df['sender'] = sender
    
    # Append to the list
    sender_emoji_dfs.append(emoji_freq_df)

# Concatenate individual sender emoji frequency DataFrames into one DataFrame
merged_emoji_df = pd.concat(sender_emoji_dfs)

# Display the updated DataFrame
print(merged_emoji_df)



   emoji  emoji_count            sender
0      😆           76  Nguyễn Mạnh Phúc
1      ❤           53  Nguyễn Mạnh Phúc
2      👍           25  Nguyễn Mạnh Phúc
3      🥲           16  Nguyễn Mạnh Phúc
4      🫀           16  Nguyễn Mạnh Phúc
..   ...          ...               ...
11     🥴            1           Yen Nhi
12     😄            1           Yen Nhi
13     😮            1           Yen Nhi
14     😜            1           Yen Nhi
15     🤣            1           Yen Nhi

[286 rows x 3 columns]


In [35]:
import plotly.express as px

# Initial sender
initial_sender = unique_senders[0]

# Create the initial figure with the initial title
fig = px.bar(merged_emoji_df[merged_emoji_df['sender'] == initial_sender],
             x='emoji',  # Swap x and y axes
             y='emoji_count',
             orientation='v',
             title=f'Most frequent Emoji by',
             labels={'emoji_count': 'Emoji Count', 'emoji': 'Emoji'},
             height=700)

# Create the update menu
sender_dropdown = [{'label': sender, 'method': 'update',
                    'args': [{'y': [merged_emoji_df[merged_emoji_df['sender'] == sender]['emoji_count']],
                              'x': [merged_emoji_df[merged_emoji_df['sender'] == sender]['emoji']],
                              'layout': {'title': f'Most Frequent Emoji by {sender}'}}]}
                   for sender in unique_senders]

# Set up the layout
fig.update_layout(updatemenus=[{'buttons': sender_dropdown,
                                 'direction': 'down',
                                 'showactive': True,
                                 'x': 0.52,  # Adjust the x position of the dropdown
                                 'xanchor': 'left',  # Anchor point for x position
                                 'y': 1.14,  # Adjust the y position of the dropdown
                                 'yanchor': 'top'  # Anchor point for y position
                                }],
                                title_x=0.4) # Adjust the y position of the title

# Show the figure
fig.show()
