# Importing Packages

In [None]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D
from keras.layers.convolutional import Convolution2D
from keras.layers.convolutional import MaxPooling2D
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from keras import backend as k 
import nltk
nltk.download('punkt')
nltk.download('stopwords')
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize, word_tokenize
import gensim
from gensim.models import Word2Vec

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


# **Loading Dataset**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df = pd.read_csv("/content/drive/My Drive/Colab Notebooks/datasets/cse499.csv")
short_data = pd.read_csv("/content/drive/My Drive/Colab Notebooks/datasets/short_data.csv")
#replace every df with short_data

In [None]:
len(short_data)

999

In [None]:
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [None]:
short_data.head()

Unnamed: 0,Title,Text,Subreddit
0,exposure does not work!,I have struggled with social anxiety from chil...,Anxiety
1,Panic attack? derealization? can't go to docto...,"Back in March (I know, a while ago D:), I woke...",Anxiety
2,How long can a panic attack last?!,I've been withdrawing from medicines lately (e...,Anxiety
3,Stepping stones,"First time poster, long time lurker. \n\nI've ...",Anxiety
4,"Coping with anxiety over climate change, on th...","Hi all,\n\nI made a throwaway account as my ma...",Anxiety


In [None]:
short_data.shape

(999, 3)

In [None]:
len(short_data)

999

In [None]:
# get the locations
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [None]:
len(X), len(y)

(488472, 488472)

# **Removing empty entries**

In [None]:
# Before deleting rows containg empty columns
df_size_before = len(short_data)
# After deleting rows containg empty columns
df_size_after = short_data.dropna(axis = 0, how = "any")    # 0 = delete entire row, any = if any empty found
df_size_after = len(short_data)
print("Before: ", df_size_before)
print("After: ", df_size_after)

Before:  999
After:  999


# **Merging Title and Text**

In [None]:
short_data["Merge_data"] = short_data["Title"] + short_data["Text"]
short_data.head()

Unnamed: 0,Title,Text,Subreddit,Merge_data
0,exposure does not work!,I have struggled with social anxiety from chil...,Anxiety,exposure does not work!I have struggled with s...
1,Panic attack? derealization? can't go to docto...,"Back in March (I know, a while ago D:), I woke...",Anxiety,Panic attack? derealization? can't go to docto...
2,How long can a panic attack last?!,I've been withdrawing from medicines lately (e...,Anxiety,How long can a panic attack last?!I've been wi...
3,Stepping stones,"First time poster, long time lurker. \n\nI've ...",Anxiety,"Stepping stonesFirst time poster, long time lu..."
4,"Coping with anxiety over climate change, on th...","Hi all,\n\nI made a throwaway account as my ma...",Anxiety,"Coping with anxiety over climate change, on th..."


In [None]:
# axis=1 = Specifies the axis to be deleted. 
# axis 1 means column and 0 means rows.
# inplace=true specifies the drop operation to be in same 
#              dataframe rather creating a copy of the dataframe after drop.

short_data.drop("Title", axis=1, inplace=True)
short_data.drop("Text", axis=1, inplace=True)
short_data.shape

(999, 2)

# **Convert Merge_data column into lower case**

In [None]:
short_data.head()

Unnamed: 0,Subreddit,Merge_data
0,Anxiety,exposure does not work!I have struggled with s...
1,Anxiety,Panic attack? derealization? can't go to docto...
2,Anxiety,How long can a panic attack last?!I've been wi...
3,Anxiety,"Stepping stonesFirst time poster, long time lu..."
4,Anxiety,"Coping with anxiety over climate change, on th..."


In [None]:
short_data["Merge_data"] = short_data["Merge_data"].str.lower()
short_data.head()

Unnamed: 0,Subreddit,Merge_data
0,Anxiety,exposure does not work!i have struggled with s...
1,Anxiety,panic attack? derealization? can't go to docto...
2,Anxiety,how long can a panic attack last?!i've been wi...
3,Anxiety,"stepping stonesfirst time poster, long time lu..."
4,Anxiety,"coping with anxiety over climate change, on th..."


# **Removing white space and punctuations**

In [None]:
short_data["text_only"] = short_data["Merge_data"].str.replace(r'\W', " ")

  """Entry point for launching an IPython kernel.


In [None]:
short_data.head()

Unnamed: 0,Subreddit,Merge_data,text_only
0,Anxiety,exposure does not work!i have struggled with s...,exposure does not work i have struggled with s...
1,Anxiety,panic attack? derealization? can't go to docto...,panic attack derealization can t go to docto...
2,Anxiety,how long can a panic attack last?!i've been wi...,how long can a panic attack last i ve been wi...
3,Anxiety,"stepping stonesfirst time poster, long time lu...",stepping stonesfirst time poster long time lu...
4,Anxiety,"coping with anxiety over climate change, on th...",coping with anxiety over climate change on th...


In [None]:
# Tokenizing based on words

short_data["text_only"] = short_data["text_only"].apply(word_tokenize)
short_data.head()

Unnamed: 0,Subreddit,Merge_data,text_only
0,Anxiety,exposure does not work!i have struggled with s...,"[exposure, does, not, work, i, have, struggled..."
1,Anxiety,panic attack? derealization? can't go to docto...,"[panic, attack, derealization, can, t, go, to,..."
2,Anxiety,how long can a panic attack last?!i've been wi...,"[how, long, can, a, panic, attack, last, i, ve..."
3,Anxiety,"stepping stonesfirst time poster, long time lu...","[stepping, stonesfirst, time, poster, long, ti..."
4,Anxiety,"coping with anxiety over climate change, on th...","[coping, with, anxiety, over, climate, change,..."


# **Removing stop words**

In [None]:
stopwords = nltk.corpus.stopwords.words("english")
# stopwords[0:10]

In [None]:
def remove_stopwords(data_to_remove_stopwords):
  txt_clean = [word for word in data_to_remove_stopwords if word not in stopwords]
  return txt_clean

short_data["no_stopwords"] = short_data["text_only"].apply(lambda x: remove_stopwords(x))
short_data.head()

Unnamed: 0,Subreddit,Merge_data,text_only,no_stopwords
0,Anxiety,exposure does not work!i have struggled with s...,"[exposure, does, not, work, i, have, struggled...","[exposure, work, struggled, social, anxiety, c..."
1,Anxiety,panic attack? derealization? can't go to docto...,"[panic, attack, derealization, can, t, go, to,...","[panic, attack, derealization, go, doctors, ad..."
2,Anxiety,how long can a panic attack last?!i've been wi...,"[how, long, can, a, panic, attack, last, i, ve...","[long, panic, attack, last, withdrawing, medic..."
3,Anxiety,"stepping stonesfirst time poster, long time lu...","[stepping, stonesfirst, time, poster, long, ti...","[stepping, stonesfirst, time, poster, long, ti..."
4,Anxiety,"coping with anxiety over climate change, on th...","[coping, with, anxiety, over, climate, change,...","[coping, anxiety, climate, change, verge, cons..."


In [None]:
# creating word2vec

model = gensim.models.Word2Vec(short_data, min_count = 1,size = 100, window = 5, sg=0) 

In [None]:
type(model)

gensim.models.word2vec.Word2Vec

# **Preparing Data as test and train**

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
# split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
len(X_train), len(X_test)

(390777, 97695)

In [None]:
len(y_train), len(y_test)

(390777, 97695)

In [None]:
X_train[:, 1:]

In [None]:
# Rehsaping train and test data
# csv data converting into image data
# Because keras function accepts inputs as images
trainX = X_train.iloc[:, 1:].values.reshape(X_train.shape[0], 1, 28, 28).astype( 'float32' )
X_train = trainX/255.0
y_train = X_train[:, 0]

In [None]:
#model's structure

model = Sequential()
#convolutional layer
model.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
#flatten output of conv
model.add(Flatten())
#hidden layer
model.add(Dense(128, activation='relu')) 
#output layer
model.add(Dense(10, activation='softmax')) 

In [None]:
#compiling sequential model
model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy'])
model.summary()

In [None]:
train_x = np.asarray(X_train)
train_y = np.asarray(y_train)

In [None]:
#training the model
model.fit()