### Creating pandas data frame from text file

#### Importing required library

In [1]:
import re
import pandas as pd

#### Reading the file

In [2]:
file = open('dummyChat.txt', 'r', encoding='utf-8')

In [3]:
## reading data from file
data = file.read()

## displaying data
print(data)

15/04/2016, 15:04 - You created group “Sample Group”
06/08/2016, 13:18 - Messages you send to this group are now secured with end-to-end encryption. Tap for more info.
06/08/2016, 13:23 - Ash Ketchum: Hey guys!
06/08/2016, 13:25 - Brock: Hey Ash, good to have a common group!
06/08/2016, 13:30 - Misty: Hey guys! Long time haven't heard anything from you
06/08/2016, 13:45 - Ash Ketchum: Indeed. I think having a whatsapp group nowadays is a good idea
06/08/2016, 14:30 - Misty: Definetly
06/08/2016, 17:25 - Brock: I totally agree
07/08/2016, 11:45 - Prof. Oak: Kids, shall I design a smart poke-ball?
07/08/2016, 18:45 - Ash Ketchum: I don't mind Prof. I quitted capturing pokemon.
07/08/2016, 18:56 - You added Wobbuffet
07/08/2016, 19:30 - Misty: Was a great time, but had enough also.
07/08/2016, 23:25 - Brock: Guys, I am still in the first gym. No one is playing pokemon, they went crazy with pokemon Go.
07/08/2016, 23:56 - You added Jessie
10/08/2016, 09:45 - Jessie: Hey, thanks for adding 

#### Cleaning and processing the data

In [4]:
## pattern to seperate data and time
pattern = '\d{1,2}/\d{1,2}/\d{2,4},\s\d{1,2}:\d{2}\s-\s'

In [5]:
messages = re.split(pattern, data)[1:]
len(messages)

24

In [6]:
dates = re.findall(pattern, data)
len(dates)

24

In [7]:
df = pd.DataFrame({'Message':messages, 'Date':dates})

## convert messages_data type
df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y, %H:%M - ')

## displaying data frame
df.head()

Unnamed: 0,Message,Date
0,You created group “Sample Group”\n,2016-04-15 15:04:00
1,Messages you send to this group are now secure...,2016-08-06 13:18:00
2,Ash Ketchum: Hey guys!\n,2016-08-06 13:23:00
3,"Brock: Hey Ash, good to have a common group!\n",2016-08-06 13:25:00
4,Misty: Hey guys! Long time haven't heard anyth...,2016-08-06 13:30:00


In [8]:
## Separating user name and user message

## list for user
users = []
## list for messages
messages = []

## iterating in messages
for message in df['Message']:
    ## splitting message with ':'
    entry = re.split('([\w\W]+?):\s', message)
    
    ## if it has value after ':'
    if entry[1:]: # user name
        ## appending users
        users.append(entry[1])
        ## appending message
        messages.append(entry[2][:-1])
    
    ## if it does not have a user name
    else:
        ## appending name as 'Notification'
        users.append('Notification')
        ## appending message
        messages.append(entry[0][:-1])
        
## dropping previous message column
df.drop('Message', axis = 1, inplace=True)

## adding user column with user names
df['User'] = users
## adding message column with just message
df['Message'] = messages

## displaying dataframe
df.head()

Unnamed: 0,Date,User,Message
0,2016-04-15 15:04:00,Notification,You created group “Sample Group”
1,2016-08-06 13:18:00,Notification,Messages you send to this group are now secure...
2,2016-08-06 13:23:00,Ash Ketchum,Hey guys!
3,2016-08-06 13:25:00,Brock,"Hey Ash, good to have a common group!"
4,2016-08-06 13:30:00,Misty,Hey guys! Long time haven't heard anything fro...


In [9]:
## Separating Year, month, day, hour and minute from Date
## and creating specific column
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month_name()
df['Day'] = df['Date'].dt.day
df['Hour'] = df['Date'].dt.hour
df['Minute'] = df['Date'].dt.minute

In [10]:
## displaying dataframe
df.head()

Unnamed: 0,Date,User,Message,Year,Month,Day,Hour,Minute
0,2016-04-15 15:04:00,Notification,You created group “Sample Group”,2016,April,15,15,4
1,2016-08-06 13:18:00,Notification,Messages you send to this group are now secure...,2016,August,6,13,18
2,2016-08-06 13:23:00,Ash Ketchum,Hey guys!,2016,August,6,13,23
3,2016-08-06 13:25:00,Brock,"Hey Ash, good to have a common group!",2016,August,6,13,25
4,2016-08-06 13:30:00,Misty,Hey guys! Long time haven't heard anything fro...,2016,August,6,13,30
