### 1. eBay Dataset
-----

In [102]:
import pandas as pd
import numpy as np

# ---- import the raw dataset

raw1 = pd.read_csv('Raw/ebay-1.csv')
raw2 = pd.read_csv('Raw/ebay-2.csv')
raw3 = pd.read_csv('Raw/ebay-3.csv')
raw4 = pd.read_csv('Raw/ebay-4.csv')
raw5 = pd.read_csv('Raw/ebay-5.csv')

print('First raw dataset information above {}\n'.format(raw1.shape)),
print('Second raw dataset information above {}\n'.format(raw2.shape)),
print('Third raw dataset information above {}\n'.format(raw3.shape)),
print('Fourth raw dataset information above {}\n'.format(raw4.shape)),
print('Fifth raw dataset information above {}\n'.format(raw5.shape))

First raw dataset information above (222, 1)

Second raw dataset information above (502, 1)

Third raw dataset information above (458, 1)

Fourth raw dataset information above (538, 1)

Fifth raw dataset information above (448, 1)



In [103]:
ignore_str = [',', '.', ';', '{', '}', '#', '/', '(', ')', '?','$']

df1 = raw1[~raw1['content'].str[0].isin(ignore_str)]
df2 = raw2[~raw2['content'].str[0].isin(ignore_str)]
df3 = raw3[~raw3['content'].str[0].isin(ignore_str)]
df4 = raw4[~raw4['content'].str[0].isin(ignore_str)]
df5 = raw5[~raw5['content'].str[0].isin(ignore_str)]

print('First filtered dataset information above {}\n'.format(df1.shape)),
print('Second filtered dataset information above {}\n'.format(df2.shape)),
print('Third filtered dataset information above {}\n'.format(df3.shape)),
print('Fourth filtered dataset information above {}\n'.format(df4.shape)),
print('Fifth filtered dataset information above {}\n'.format(df5.shape))

First filtered dataset information above (222, 1)

Second filtered dataset information above (502, 1)

Third filtered dataset information above (458, 1)

Fourth filtered dataset information above (538, 1)

Fifth filtered dataset information above (448, 1)



In [106]:
# Filter out the disturibing content to be removed.
str_list = [ '{', '®','℗','©','=','\(']
pattern = '|'.join(str_list)

df1 = df1[~df1.content.str.lower().str.contains(pattern)]
df2 = df2[~df2.content.str.lower().str.contains(pattern)]
df3 = df3[~df3.content.str.lower().str.contains(pattern)]
df4 = df4[~df4.content.str.lower().str.contains(pattern)]
df5 = df5[~df5.content.str.lower().str.contains(pattern)]

print('First filtered dataset information above {}\n'.format(df1.shape)),
print('Second filtered dataset information above {}\n'.format(df2.shape)),
print('Third filtered dataset information above {}\n'.format(df3.shape)),
print('Fourth filtered dataset information above {}\n'.format(df4.shape)),
print('Fifth filtered dataset information above {}\n'.format(df5.shape))

First filtered dataset information above (219, 1)

Second filtered dataset information above (490, 1)

Third filtered dataset information above (451, 1)

Fourth filtered dataset information above (523, 1)

Fifth filtered dataset information above (436, 1)



In [107]:
# filter out the dark pattern strings

str_list = ['left','sold','watching','watchers']
pattern = '|'.join(str_list)

dp1 = df1[df1.content.str.lower().str.contains(pattern)]
dp2 = df2[df2.content.str.lower().str.contains(pattern)]
dp3 = df3[df3.content.str.lower().str.contains(pattern)]
dp4 = df4[df4.content.str.lower().str.contains(pattern)]
dp5 = df5[df5.content.str.lower().str.contains(pattern)]

print('First filtered dataset information above {}\n'.format(dp1.shape)),
print('Second filtered dataset information above {}\n'.format(dp2.shape)),
print('Third filtered dataset information above {}\n'.format(dp3.shape)),
print('Fourth filtered dataset information above {}\n'.format(dp4.shape)),
print('Fifth filtered dataset information above {}\n'.format(dp5.shape))

First filtered dataset information above (0, 1)

Second filtered dataset information above (38, 1)

Third filtered dataset information above (0, 1)

Fourth filtered dataset information above (36, 1)

Fifth filtered dataset information above (3, 1)



In [108]:
# after checking the dp list, remove the one that is not actually dark pattern

dp4 = dp4[~dp4.content.str.lower().str.contains('anti')]

print('First filtered dataset information above {}\n'.format(dp1.shape)),
print('Second filtered dataset information above {}\n'.format(dp2.shape)),
print('Third filtered dataset information above {}\n'.format(dp3.shape)),
print('Fourth filtered dataset information above {}\n'.format(dp4.shape)),
print('Fifth filtered dataset information above {}\n'.format(dp5.shape))

First filtered dataset information above (0, 1)

Second filtered dataset information above (38, 1)

Third filtered dataset information above (0, 1)

Fourth filtered dataset information above (35, 1)

Fifth filtered dataset information above (3, 1)



In [109]:
# Filter out the normal content strings

nor1 = df1[~df1.isin(dp1).any(axis=1)]
nor2 = df2[~df2.isin(dp2).any(axis=1)]
nor3 = df3[~df3.isin(dp3).any(axis=1)]
nor4 = df4[~df4.isin(dp4).any(axis=1)]
nor5 = df5[~df5.isin(dp5).any(axis=1)]

print('First normal dataset information above {}\n'.format(nor1.shape)),
print('Second normal dataset information above {}\n'.format(nor2.shape)),
print('Third normal dataset information above {}\n'.format(nor3.shape)),
print('Fourth normal dataset information above {}\n'.format(nor4.shape)),
print('Fifth normal dataset information above {}\n'.format(nor5.shape))

First normal dataset information above (219, 1)

Second normal dataset information above (452, 1)

Third normal dataset information above (451, 1)

Fourth normal dataset information above (488, 1)

Fifth normal dataset information above (433, 1)



In [110]:
# save the normal content and dark pattern content as csv files

# dark patterns

dp2.to_csv('DP/ebay1.csv', index = False, header = True)
dp4.to_csv('DP/ebay2.csv', index = False, header = True)
dp5.to_csv('DP/ebay3.csv', index = False, header = True)

# normal content

nor1.to_csv('Normal/ebay1.csv', index = False, header = True)
nor2.to_csv('Normal/ebay2.csv', index = False, header = True)
nor3.to_csv('Normal/ebay3.csv', index = False, header = True)
nor4.to_csv('Normal/ebay4.csv', index = False, header = True)
nor5.to_csv('Normal/ebay5.csv', index = False, header = True)

### 2. Amazon Dataset
-----

In [111]:
import pandas as pd
import numpy as np

# ---- import the raw dataset

raw1 = pd.read_csv('Raw/amazon-1.csv')
raw2 = pd.read_csv('Raw/amazon-2.csv')
raw3 = pd.read_csv('Raw/amazon-3.csv')
raw4 = pd.read_csv('Raw/amazon-4.csv')


print('First raw dataset information above {}\n'.format(raw1.shape)),
print('Second raw dataset information above {}\n'.format(raw2.shape)),
print('Third raw dataset information above {}\n'.format(raw3.shape)),
print('Fourth raw dataset information above {}\n'.format(raw4.shape))

First raw dataset information above (279, 1)

Second raw dataset information above (515, 1)

Third raw dataset information above (423, 1)

Fourth raw dataset information above (429, 1)



In [112]:
ignore_str = [',', '.', ';', '{', '}', '#', '/', '(', ')', '?','$']

df1 = raw1[~raw1['content'].str[0].isin(ignore_str)]
df2 = raw2[~raw2['content'].str[0].isin(ignore_str)]
df3 = raw3[~raw3['content'].str[0].isin(ignore_str)]
df4 = raw4[~raw4['content'].str[0].isin(ignore_str)]

print('First filtered dataset information above {}\n'.format(df1.shape)),
print('Second filtered dataset information above {}\n'.format(df2.shape)),
print('Third filtered dataset information above {}\n'.format(df3.shape)),
print('Fourth filtered dataset information above {}\n'.format(df4.shape))

First filtered dataset information above (278, 1)

Second filtered dataset information above (510, 1)

Third filtered dataset information above (422, 1)

Fourth filtered dataset information above (424, 1)



In [113]:
# Filter out the disturibing content to be removed.
str_list = ['{', '®','℗','©','=','\(']
pattern = '|'.join(str_list)

df1 = df1[~df1.content.str.lower().str.contains(pattern)]
df2 = df2[~df2.content.str.lower().str.contains(pattern)]
df3 = df3[~df3.content.str.lower().str.contains(pattern)]
df4 = df4[~df4.content.str.lower().str.contains(pattern)]

print('First filtered dataset information above {}\n'.format(df1.shape)),
print('Second filtered dataset information above {}\n'.format(df2.shape)),
print('Third filtered dataset information above {}\n'.format(df3.shape)),
print('Fourth filtered dataset information above {}\n'.format(df4.shape))

First filtered dataset information above (219, 1)

Second filtered dataset information above (433, 1)

Third filtered dataset information above (390, 1)

Fourth filtered dataset information above (373, 1)



In [114]:
# filter out the dark pattern strings

str_list = ['left in stock','ends in','limited-time deal']
pattern = '|'.join(str_list)

dp1 = df1[df1.content.str.lower().str.contains(pattern)]
dp2 = df2[df2.content.str.lower().str.contains(pattern)]
dp3 = df3[df3.content.str.lower().str.contains(pattern)]
dp4 = df4[df4.content.str.lower().str.contains(pattern)]

print('First filtered dataset information above {}\n'.format(dp1.shape)),
print('Second filtered dataset information above {}\n'.format(dp2.shape)),
print('Third filtered dataset information above {}\n'.format(dp3.shape)),
print('Fourth filtered dataset information above {}\n'.format(dp4.shape))

First filtered dataset information above (0, 1)

Second filtered dataset information above (11, 1)

Third filtered dataset information above (0, 1)

Fourth filtered dataset information above (3, 1)



In [115]:
dp2

Unnamed: 0,content
248,Ends in 07:42:09
251,Ends in 07:37:10
263,Ends in 02:27:10
271,Ends in 04:17:10
273,Ends in 01:57:10
284,Ends in 02:32:10
286,Ends in 05:22:10
330,Ends in 01:17:09
337,"Limited-time deal: Up to 80% off, select top r..."
350,Ends in 57:09


In [116]:
dp4

Unnamed: 0,content
105,Only 16 left in stock - order soon.
118,Only 8 left in stock - order soon.
241,Only 15 left in stock - order soon.


In [117]:
# Filter out the normal content strings

nor1 = df1[~df1.isin(dp1).any(axis=1)]
nor2 = df2[~df2.isin(dp2).any(axis=1)]
nor3 = df3[~df3.isin(dp3).any(axis=1)]
nor4 = df4[~df4.isin(dp4).any(axis=1)]

print('First normal dataset information above {}\n'.format(nor1.shape)),
print('Second normal dataset information above {}\n'.format(nor2.shape)),
print('Third normal dataset information above {}\n'.format(nor3.shape)),
print('Fourth normal dataset information above {}\n'.format(nor4.shape))

First normal dataset information above (219, 1)

Second normal dataset information above (422, 1)

Third normal dataset information above (390, 1)

Fourth normal dataset information above (370, 1)



In [118]:
# save the normal content and dark pattern content as csv files

# dark patterns

dp2.to_csv('DP/amazon1.csv', index = False, header = True)
dp4.to_csv('DP/amazon2.csv', index = False, header = True)

# normal content

nor1.to_csv('Normal/amazon1.csv', index = False, header = True)
nor2.to_csv('Normal/amazon2.csv', index = False, header = True)
nor3.to_csv('Normal/amazon3.csv', index = False, header = True)
nor4.to_csv('Normal/amazon4.csv', index = False, header = True)

### 3. Wish Dataset
-----

In [119]:
import pandas as pd
import numpy as np

# ---- import the raw dataset

raw1 = pd.read_csv('Raw/wish-1.csv')
raw2 = pd.read_csv('Raw/wish-2.csv')
raw3 = pd.read_csv('Raw/wish-3.csv')

print('First raw dataset information above {}\n'.format(raw1.shape)),
print('Second raw dataset information above {}\n'.format(raw2.shape)),
print('Third raw dataset information above {}\n'.format(raw3.shape))

First raw dataset information above (68, 1)

Second raw dataset information above (69, 1)

Third raw dataset information above (65, 1)



In [120]:
ignore_str = [',', '.', ';', '{', '}', '#', '/', '(', ')', '?','$']

df1 = raw1[~raw1['content'].str[0].isin(ignore_str)]
df2 = raw2[~raw2['content'].str[0].isin(ignore_str)]
df3 = raw3[~raw3['content'].str[0].isin(ignore_str)]

print('First filtered dataset information above {}\n'.format(df1.shape)),
print('Second filtered dataset information above {}\n'.format(df2.shape)),
print('Third filtered dataset information above {}\n'.format(df3.shape))

First filtered dataset information above (68, 1)

Second filtered dataset information above (69, 1)

Third filtered dataset information above (65, 1)



In [121]:
# Filter out the disturibing content to be removed.
str_list = [ '{', '®','℗','©','=','\(']
pattern = '|'.join(str_list)

df1 = df1[~df1.content.str.lower().str.contains(pattern)]
df2 = df2[~df2.content.str.lower().str.contains(pattern)]
df3 = df3[~df3.content.str.lower().str.contains(pattern)]

print('First filtered dataset information above {}\n'.format(df1.shape)),
print('Second filtered dataset information above {}\n'.format(df2.shape)),
print('Third filtered dataset information above {}\n'.format(df3.shape))

First filtered dataset information above (68, 1)

Second filtered dataset information above (69, 1)

Third filtered dataset information above (65, 1)



In [122]:
# filter out the dark pattern strings

str_list = ['bought this','almost gone','invites sent','like you bought']
pattern = '|'.join(str_list)

dp1 = df1[df1.content.str.lower().str.contains(pattern)]
dp2 = df2[df2.content.str.lower().str.contains(pattern)]
dp3 = df3[df3.content.str.lower().str.contains(pattern)]

print('First filtered dataset information above {}\n'.format(dp1.shape)),
print('Second filtered dataset information above {}\n'.format(dp2.shape)),
print('Third filtered dataset information above {}\n'.format(dp3.shape))

First filtered dataset information above (43, 1)

Second filtered dataset information above (40, 1)

Third filtered dataset information above (39, 1)



In [123]:
dp1

Unnamed: 0,content
11,"1,000+ bought this"
12,"100,000+ bought this"
13,Almost Gone!
14,"10,000+ bought this"
15,100+ bought this
16,Almost Gone!
17,"5,000+ bought this"
18,"20,000+ bought this"
19,"20,000+ bought this"
20,"1,000+ bought this"


In [124]:
dp2

Unnamed: 0,content
11,100+ bought this
12,Almost Gone!
13,"1,000+ bought this"
14,"10,000+ bought this"
15,"1,000+ bought this"
17,"5,000+ bought this"
18,"5,000+ bought this"
19,100+ bought this
20,"10,000+ bought this"
21,Almost Gone!


In [125]:
dp3

Unnamed: 0,content
11,"10,000+ bought this"
12,"1,000+ bought this"
13,Almost Gone!
14,"1,000+ bought this"
15,"10,000+ bought this"
16,Almost Gone!
17,100+ bought this
18,"20,000+ bought this"
19,100+ bought this
20,"10,000+ bought this"


In [126]:
# Filter out the normal content strings

nor1 = df1[~df1.isin(dp1).any(axis=1)]
nor2 = df2[~df2.isin(dp2).any(axis=1)]
nor3 = df3[~df3.isin(dp3).any(axis=1)]

print('First normal dataset information above {}\n'.format(nor1.shape)),
print('Second normal dataset information above {}\n'.format(nor2.shape)),
print('Third normal dataset information above {}\n'.format(nor3.shape))

First normal dataset information above (25, 1)

Second normal dataset information above (29, 1)

Third normal dataset information above (26, 1)



In [127]:
# save the normal content and dark pattern content as csv files

# dark patterns

dp1.to_csv('DP/wish1.csv', index = False, header = True)
dp2.to_csv('DP/wish2.csv', index = False, header = True)
dp3.to_csv('DP/wish3.csv', index = False, header = True)

# normal content

nor1.to_csv('Normal/wish1.csv', index = False, header = True)
nor2.to_csv('Normal/wish2.csv', index = False, header = True)
nor3.to_csv('Normal/wish3.csv', index = False, header = True)

### 4. Shein Dataset
-----

In [128]:
import pandas as pd
import numpy as np

# ---- import the raw dataset

raw1 = pd.read_csv('Raw/shein-1.csv')
raw2 = pd.read_csv('Raw/shein-2.csv')
raw3 = pd.read_csv('Raw/shein-3.csv')
raw4 = pd.read_csv('Raw/shein-4.csv')

print('First raw dataset information above {}\n'.format(raw1.shape)),
print('Second raw dataset information above {}\n'.format(raw2.shape)),
print('Third raw dataset information above {}\n'.format(raw3.shape)),
print('Fourth raw dataset information above {}\n'.format(raw4.shape))

First raw dataset information above (510, 1)

Second raw dataset information above (1158, 1)

Third raw dataset information above (1056, 1)

Fourth raw dataset information above (1091, 1)



In [129]:
ignore_str = [',', '.', ';', '{', '}', '#', '/', '(', ')', '?','$']

df1 = raw1[~raw1['content'].str[0].isin(ignore_str)]
df2 = raw2[~raw2['content'].str[0].isin(ignore_str)]
df3 = raw3[~raw3['content'].str[0].isin(ignore_str)]
df4 = raw4[~raw4['content'].str[0].isin(ignore_str)]

print('First filtered dataset information above {}\n'.format(df1.shape)),
print('Second filtered dataset information above {}\n'.format(df2.shape)),
print('Third filtered dataset information above {}\n'.format(df3.shape)),
print('Fourth filtered dataset information above {}\n'.format(df4.shape))

First filtered dataset information above (510, 1)

Second filtered dataset information above (1158, 1)

Third filtered dataset information above (1056, 1)

Fourth filtered dataset information above (1091, 1)



In [130]:
# Filter out the disturibing content to be removed.
str_list = [ '{', '®','℗','©','=','\(']
pattern = '|'.join(str_list)

df1 = df1[~df1.content.str.lower().str.contains(pattern)]
df2 = df2[~df2.content.str.lower().str.contains(pattern)]
df3 = df3[~df3.content.str.lower().str.contains(pattern)]
df4 = df4[~df4.content.str.lower().str.contains(pattern)]

print('First filtered dataset information above {}\n'.format(df1.shape)),
print('Second filtered dataset information above {}\n'.format(df2.shape)),
print('Third filtered dataset information above {}\n'.format(df3.shape)),
print('Fourth filtered dataset information above {}\n'.format(df4.shape))

First filtered dataset information above (505, 1)

Second filtered dataset information above (1152, 1)

Third filtered dataset information above (1050, 1)

Fourth filtered dataset information above (1085, 1)



In [131]:
# filter out the dark pattern strings

str_list = ['sold','ends in']
pattern = '|'.join(str_list)

dp1 = df1[df1.content.str.lower().str.contains(pattern)]
dp2 = df2[df2.content.str.lower().str.contains(pattern)]
dp3 = df3[df3.content.str.lower().str.contains(pattern)]
dp4 = df4[df4.content.str.lower().str.contains(pattern)]

print('First filtered dataset information above {}\n'.format(dp1.shape)),
print('Second filtered dataset information above {}\n'.format(dp2.shape)),
print('Third filtered dataset information above {}\n'.format(dp3.shape)),
print('Fourth filtered dataset information above {}\n'.format(dp4.shape))

First filtered dataset information above (41, 1)

Second filtered dataset information above (0, 1)

Third filtered dataset information above (0, 1)

Fourth filtered dataset information above (0, 1)



In [132]:
dp1

Unnamed: 0,content
305,Ends in
307,88 Sold 22%
308,88 Sold
310,5 Sold 3%
311,5 Sold
313,20 Sold 10%
314,20 Sold
316,10 Sold 5%
317,10 Sold
319,3 Sold 2%


In [133]:
# Filter out the normal content strings

nor1 = df1[~df1.isin(dp1).any(axis=1)]
nor2 = df2[~df2.isin(dp2).any(axis=1)]
nor3 = df3[~df3.isin(dp3).any(axis=1)]
nor4 = df4[~df4.isin(dp4).any(axis=1)]

print('First normal dataset information above {}\n'.format(nor1.shape)),
print('Second normal dataset information above {}\n'.format(nor2.shape)),
print('Third normal dataset information above {}\n'.format(nor3.shape)),
print('Fourth normal dataset information above {}\n'.format(nor4.shape))

First normal dataset information above (464, 1)

Second normal dataset information above (1152, 1)

Third normal dataset information above (1050, 1)

Fourth normal dataset information above (1085, 1)



In [134]:
# save the normal content and dark pattern content as csv files

# dark patterns

dp1.to_csv('DP/shein1.csv', index = False, header = True)

# normal content

nor1.to_csv('Normal/shein1.csv', index = False, header = True)
nor2.to_csv('Normal/shein2.csv', index = False, header = True)
nor3.to_csv('Normal/shein3.csv', index = False, header = True)
nor4.to_csv('Normal/shein4.csv', index = False, header = True)

### The rest of dataset without DP detected
-----

In [135]:
import pandas as pd
import numpy as np

# ---- import the raw dataset

raw1 = pd.read_csv('Raw/alibaba-1.csv')
raw2 = pd.read_csv('Raw/alibaba-2.csv')
raw3 = pd.read_csv('Raw/alibaba-3.csv')
raw4 = pd.read_csv('Raw/alibaba-4.csv')

raw5 = pd.read_csv('Raw/boohoo-1.csv')
raw6 = pd.read_csv('Raw/boohoo-2.csv')
raw7 = pd.read_csv('Raw/boohoo-3.csv')
raw8 = pd.read_csv('Raw/boohoo-4.csv')
raw9 = pd.read_csv('Raw/boohoo-5.csv')

raw10 = pd.read_csv('Raw/PC-1.csv')
raw11 = pd.read_csv('Raw/PC-2.csv')
raw12 = pd.read_csv('Raw/PC-3.csv')
raw13 = pd.read_csv('Raw/PC-4.csv')
raw14 = pd.read_csv('Raw/PC-5.csv')

raw_list = [raw1,raw2,raw3,raw4,raw5,raw6,raw7,raw8,raw9,raw10,raw11,raw12,raw13,raw14]

for i,j in enumerate(raw_list):
    print('The {}th of the raw datasets information is: {}\n'.format(i+1,j.shape))

The 1th of the raw datasets information is: (925, 1)

The 2th of the raw datasets information is: (859, 1)

The 3th of the raw datasets information is: (943, 1)

The 4th of the raw datasets information is: (928, 1)

The 5th of the raw datasets information is: (662, 1)

The 6th of the raw datasets information is: (1224, 1)

The 7th of the raw datasets information is: (1029, 1)

The 8th of the raw datasets information is: (809, 1)

The 9th of the raw datasets information is: (771, 1)

The 10th of the raw datasets information is: (806, 1)

The 11th of the raw datasets information is: (869, 1)

The 12th of the raw datasets information is: (897, 1)

The 13th of the raw datasets information is: (628, 1)

The 14th of the raw datasets information is: (938, 1)



In [136]:
ignore_str = [',', '.', ';', '{', '}', '#', '/', '(', ')', '?','$']

df_list = []

for i in raw_list:
    df_list.append(i[~i['content'].str[0].isin(ignore_str)])

for i,j in enumerate(df_list):
    print('The {}th of the filtered datasets information is: {}\n'.format(i+1,j.shape))

The 1th of the filtered datasets information is: (925, 1)

The 2th of the filtered datasets information is: (818, 1)

The 3th of the filtered datasets information is: (875, 1)

The 4th of the filtered datasets information is: (928, 1)

The 5th of the filtered datasets information is: (661, 1)

The 6th of the filtered datasets information is: (1223, 1)

The 7th of the filtered datasets information is: (1029, 1)

The 8th of the filtered datasets information is: (809, 1)

The 9th of the filtered datasets information is: (771, 1)

The 10th of the filtered datasets information is: (806, 1)

The 11th of the filtered datasets information is: (869, 1)

The 12th of the filtered datasets information is: (897, 1)

The 13th of the filtered datasets information is: (628, 1)

The 14th of the filtered datasets information is: (938, 1)



In [137]:
# Filter out the disturibing content to be removed.
str_list = [ '{', '®','℗','©','=','\(']
pattern = '|'.join(str_list)

df_further_filter_list = []

for i in df_list:
    df_further_filter_list.append(i[~i.content.str.lower().str.contains(pattern)])

for i,j in enumerate(df_further_filter_list):
    print('The {}th of the further filtered datasets information is: {}\n'.format(i+1,j.shape))

The 1th of the further filtered datasets information is: (889, 1)

The 2th of the further filtered datasets information is: (807, 1)

The 3th of the further filtered datasets information is: (863, 1)

The 4th of the further filtered datasets information is: (909, 1)

The 5th of the further filtered datasets information is: (655, 1)

The 6th of the further filtered datasets information is: (1212, 1)

The 7th of the further filtered datasets information is: (1019, 1)

The 8th of the further filtered datasets information is: (799, 1)

The 9th of the further filtered datasets information is: (761, 1)

The 10th of the further filtered datasets information is: (798, 1)

The 11th of the further filtered datasets information is: (827, 1)

The 12th of the further filtered datasets information is: (870, 1)

The 13th of the further filtered datasets information is: (625, 1)

The 14th of the further filtered datasets information is: (902, 1)



In [138]:
# save the normal content and dark pattern content as csv files

# dark patterns

for i,j in enumerate(df_further_filter_list):
    filename = 'Normal/'+'norDF'+str(i)+'.csv'
    j.to_csv(filename, index = False, header = True)

In [None]:
# Find Rows Which Are Not common Between Two dataframes

pd.concat([sub1,sub2]).drop_duplicates(keep=False)