In [1]:
import numpy as np
import pandas as pd
from datetime import datetime

In [17]:
import warnings

# Suppress future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [5]:
ConversationData = pd.DataFrame()
ConversationData = pd.read_csv('../../dataset/dataset/sensing/conversation/conversation_u00.csv')
print(ConversationData)

      start_timestamp   end_timestamp
0          1364359600      1364359812
1          1364382621      1364383065
2          1364383516      1364384993
3          1364385033      1364385094
4          1364385786      1364385866
...               ...             ...
2106       1370037443      1370037604
2107       1370037874      1370038710
2108       1370038931      1370044458
2109       1370044688      1370048026
2110       1370048237      1370048610

[2111 rows x 2 columns]


In [6]:
ConversationData.rename(columns={' end_timestamp': 'end_timestamp'}, inplace=True)

In [7]:
# Convert timestamps to datetime
ConversationData['start_timestamp'] = pd.to_datetime(ConversationData['start_timestamp'], unit='s')
ConversationData['end_timestamp'] = pd.to_datetime(ConversationData['end_timestamp'], unit='s')


In [8]:
ConversationData['date'] = ConversationData['start_timestamp'].dt.date
ConversationData['time'] = ConversationData['start_timestamp'].dt.time
ConversationData['date'] = pd.to_datetime(ConversationData['date'])
ConversationData['week'] = ConversationData['date'].dt.isocalendar().week


In [9]:
# Calculate conversation length
ConversationData['conversation_length'] = ConversationData['end_timestamp'] - ConversationData['start_timestamp']
ConversationData['conversation_length_seconds'] = ConversationData['conversation_length'].dt.total_seconds()
print(ConversationData['conversation_length_seconds'])

0        212.0
1        444.0
2       1477.0
3         61.0
4         80.0
         ...  
2106     161.0
2107     836.0
2108    5527.0
2109    3338.0
2110     373.0
Name: conversation_length_seconds, Length: 2111, dtype: float64


In [10]:
# Convert 'time' column to datetime format
ConversationData['hour'] = ConversationData['start_timestamp'].dt.hour
# print(ActivityData)

bins = [-1, 6, 12, 18, 24]
labels = ['Early Morning', 'Morning', 'Afternoon', 'Evening']
ConversationData['timeCategory'] = pd.cut(ConversationData['hour'], bins=bins, labels=labels, right=False)
ConversationData.drop(columns=['hour'], inplace=True)

print(ConversationData)

         start_timestamp       end_timestamp       date      time  week  \
0    2013-03-27 04:46:40 2013-03-27 04:50:12 2013-03-27  04:46:40    13   
1    2013-03-27 11:10:21 2013-03-27 11:17:45 2013-03-27  11:10:21    13   
2    2013-03-27 11:25:16 2013-03-27 11:49:53 2013-03-27  11:25:16    13   
3    2013-03-27 11:50:33 2013-03-27 11:51:34 2013-03-27  11:50:33    13   
4    2013-03-27 12:03:06 2013-03-27 12:04:26 2013-03-27  12:03:06    13   
...                  ...                 ...        ...       ...   ...   
2106 2013-05-31 21:57:23 2013-05-31 22:00:04 2013-05-31  21:57:23    22   
2107 2013-05-31 22:04:34 2013-05-31 22:18:30 2013-05-31  22:04:34    22   
2108 2013-05-31 22:22:11 2013-05-31 23:54:18 2013-05-31  22:22:11    22   
2109 2013-05-31 23:58:08 2013-06-01 00:53:46 2013-05-31  23:58:08    22   
2110 2013-06-01 00:57:17 2013-06-01 01:03:30 2013-06-01  00:57:17    22   

     conversation_length  conversation_length_seconds   timeCategory  
0        0 days 00:03:32    

In [19]:
ConversationData = ConversationData.sort_values(by='week')
ConversationData['userId'] = 'u00'

In [38]:
weeks = ConversationData['week'].unique()
# print(weeks)
# print(len(weeks))
# print(weeks[9])
dfs = []

for week in weeks[:-1]:
    twoWeekData = ConversationData[(ConversationData['week'] == week) | (ConversationData['week'] == week+1)]

    # Group by 'timeCategory' and calculate the sum and mean of 'conversation_length_seconds' column
    summary_df = twoWeekData.groupby(['userId','timeCategory'])['conversation_length_seconds'].agg(['sum', 'mean']).reset_index()
    summary_df.columns = ['userId','timeCategory', 'Sum', 'Mean']

    pivot_df = summary_df.pivot_table(index='userId', columns=['timeCategory'], values=['Sum', 'Mean']).fillna(0)
    pivot_df.columns = [f"Conversation{category}{calc}" for calc, category in pivot_df.columns]
    # print(pivot_df)

    # Group by 'timeCategory' and find the indices of the shortest and longest conversations
    shortest_indices = twoWeekData.groupby('timeCategory')['conversation_length_seconds'].idxmin()
    longest_indices = twoWeekData.groupby('timeCategory')['conversation_length_seconds'].idxmax()

    # Retrieve the rows corresponding to the shortest and longest conversations
    shortest_conversations = twoWeekData.loc[shortest_indices]
    longest_conversations = twoWeekData.loc[longest_indices]

    ShortestRestructured = shortest_conversations.pivot_table(index='userId', columns=['timeCategory'], values='conversation_length_seconds').fillna(0)
    ShortestRestructured.columns = [f"ConversationShortest{category}" for category in ShortestRestructured.columns]
    # print(ShortestRestructured)

    LongestRestructured = longest_conversations.pivot_table(index='userId', columns=['timeCategory'], values='conversation_length_seconds').fillna(0)
    LongestRestructured.columns = [f"ConversationLongest{category}" for category in LongestRestructured.columns]
    # print(LongestRestructured)
    longestShortest = pd.merge(ShortestRestructured, LongestRestructured, on='userId')

    merged = pd.merge(pivot_df, longestShortest, on='userId')


    merged['WeekId']= week - weeks[0]
    print(merged)
    dfs.append(merged)

result_df = pd.concat(dfs)
result_df.reset_index(inplace=True)

        ConversationEarly MorningMean  ConversationMorningMean  \
userId                                                           
u00                        707.455696               597.030303   

        ConversationAfternoonMean  ConversationEveningMean  \
userId                                                       
u00                      1194.125               967.605839   

        ConversationEarly MorningSum  ConversationMorningSum  \
userId                                                         
u00                          55889.0                 19702.0   

        ConversationAfternoonSum  ConversationEveningSum  \
userId                                                     
u00                     114636.0                132562.0   

        ConversationShortestEarly Morning  ConversationShortestMorning  \
userId                                                                   
u00                                  40.0                         50.0   

        Conversat

In [39]:
print(result_df)

  userId  ConversationEarly MorningMean  ConversationMorningMean  \
0    u00                     707.455696               597.030303   
1    u00                     481.548673               539.687500   
2    u00                     401.300885               561.200000   
3    u00                     438.800000               585.229167   
4    u00                     380.000000               447.438776   
5    u00                     365.471154               548.370690   
6    u00                     403.369231               903.015385   
7    u00                     526.919355               936.500000   
8    u00                     546.541176               334.782609   

   ConversationAfternoonMean  ConversationEveningMean  \
0                1194.125000               967.605839   
1                1021.666667               764.549133   
2                1027.721429               839.536145   
3                1132.341270               855.354286   
4                1051.284483      