In [6]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import os
import io

# Upload Data

In [9]:
from google.colab import files

def upload(num_files):
    file_paths = []
    print("Uploaded:")
    for i in range(num_files):
        file_dict = files.upload()
        file_paths.append(io.BytesIO(file_dict[next(iter(file_dict))]))
        print(file_paths[i])
        
    return file_paths

In [35]:
txt_files = upload(3)

Uploaded:


Saving train.txt to train (3).txt
<_io.BytesIO object at 0x7f77ecf170b0>


Saving val.txt to val (2).txt
<_io.BytesIO object at 0x7f77ecf17230>


Saving test.txt to test (2).txt
<_io.BytesIO object at 0x7f77ecf17350>


In [27]:
csv_files = upload(1)

Uploaded:


Saving all_tweets.csv to all_tweets (5).csv
<_io.BytesIO object at 0x7f77ecff7c50>


## Tweet Sentiment and Emotion Analysis Datset

In [28]:
text_emotion_detection = pd.read_csv(csv_files[0], index_col=0)

In [29]:
text_emotion_detection.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6032 entries, 0 to 97
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   sentiment  6032 non-null   object
 1   text       6032 non-null   object
 2   user       6032 non-null   object
 3   label      6032 non-null   object
dtypes: object(4)
memory usage: 235.6+ KB


In [32]:
text_emotion_detection.head(10)

Unnamed: 0,sentiment,text,user,label
0,positive,RT @SchudioTv: Want to know more about #autism...,beyondbehaviour,anxiety
1,negative,We blame ourselves and feel worse. Start with ...,cherie7c,anxiety
2,positive,"RT @PsychiatristCNS: 130,000 patient years and...",ThinkNeha,anxiety
3,neutral,RT @SkypeTherapist: See a therapist online ove...,FrankCoulson7,anxiety
4,positive,"RT @PsychiatristCNS: 130,000 patient years and...",Phcourtet,anxiety
5,neutral,The onset of the #pandemic &amp; #WFH has led ...,ajhospitalmng,anxiety
6,neutral,#Climate change is concerning. \n\nThese can c...,585Mentalhealth,anxiety
7,positive,"130,000 patient years and the diagnostic stabi...",PsychiatristCNS,anxiety
8,neutral,Looking for online counseling via Skype? Skype...,SkypeTherapist,anxiety
9,neutral,RT @JonnyCleanPick: Just sharing my thoughts😊 ...,TrioMinutes,anxiety


## Text Emotion Datset

In [36]:
train = pd.read_csv(txt_files[0], sep=";", header=None)

In [38]:
train.rename(columns={0: 'text', 1: 'label'}, inplace=True)

In [40]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16000 entries, 0 to 15999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    16000 non-null  object
 1   label   16000 non-null  object
dtypes: object(2)
memory usage: 250.1+ KB


In [41]:
train.head(10)

Unnamed: 0,text,label
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger
5,ive been feeling a little burdened lately wasn...,sadness
6,ive been taking or milligrams or times recomme...,surprise
7,i feel as confused about life as a teenager or...,fear
8,i have been with petronas for years i feel tha...,joy
9,i feel romantic too,love


In [42]:
val = pd.read_csv(txt_files[1], sep=";", header=None)
test = pd.read_csv(txt_files[2], sep=";", header=None)

In [43]:
val.rename(columns={0: 'text', 1: 'label'}, inplace=True)
test.rename(columns={0: 'text', 1: 'label'}, inplace=True)

In [45]:
val.info()
print()
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    2000 non-null   object
 1   label   2000 non-null   object
dtypes: object(2)
memory usage: 31.4+ KB

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    2000 non-null   object
 1   label   2000 non-null   object
dtypes: object(2)
memory usage: 31.4+ KB


In [46]:
val.head(5)

Unnamed: 0,text,label
0,im feeling quite sad and sorry for myself but ...,sadness
1,i feel like i am still looking at a blank canv...,sadness
2,i feel like a faithful servant,love
3,i am just feeling cranky and blue,anger
4,i can have for a treat or if i am feeling festive,joy


In [47]:
test.head(5)

Unnamed: 0,text,label
0,im feeling rather rotten so im not very ambiti...,sadness
1,im updating my blog because i feel shitty,sadness
2,i never make her separate from me because i do...,sadness
3,i left with my bouquet of red and yellow tulip...,joy
4,i was feeling a little vain when i did this one,sadness


#### Combine datasets

In [48]:
dataset = train.append(val)
dataset = dataset.append(test)

In [49]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 20000 entries, 0 to 1999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    20000 non-null  object
 1   label   20000 non-null  object
dtypes: object(2)
memory usage: 468.8+ KB


In [50]:
dataset.head(10)

Unnamed: 0,text,label
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger
5,ive been feeling a little burdened lately wasn...,sadness
6,ive been taking or milligrams or times recomme...,surprise
7,i feel as confused about life as a teenager or...,fear
8,i have been with petronas for years i feel tha...,joy
9,i feel romantic too,love


In [51]:
dataset.to_csv("dataset.csv")