In [1]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split, KFold
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer
import matplotlib
from matplotlib import pyplot as plt
# import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [2]:
stop_words = set(stopwords.words("english"))

In [3]:
def stopwords(text):
    '''a function for removing stopwords'''
    text = [word.lower() for word in text.split() if word not in stop_words]
    return(text)

In [4]:
def remove_punctuation(text):
    '''a function for removing punctuation'''
    import string
    translator = str.maketrans('', '', string.punctuation)
    return text.translate(translator)

In [5]:
data = pd.read_csv('JLAllPages.csv')
print ("Original 'JLAllPages.csv' CSV Data: \n")
data

Original 'JLAllPages.csv' CSV Data: 



Unnamed: 0,Date,Name,Review
0,1 month ago,Lucy,I am currently a plusnet broadband customer w...
1,7 months ago,BJ,Avoid. Avoid. Avoid. First John Lewis Broadba...
2,7 months ago,Nic,They've taken my money and won't refund. I ca...
3,1 year ago,David7667,I ordered broadband with telephone for a rece...
4,1 year ago,clifford,Poor internet phone line went down a few times
5,1 year ago,Ken,I ordered broadband 5 weeks ago still not hea...
6,1 year ago,Shirley chandler,Have waited 3 weeks since I applied for John ...
7,1 year ago,Moe,If I could give zero or negative ratings I wo...
8,2 years ago,Christopher J,John Lewis broadband deserve a positive revie...
9,2 years ago,Mart,Avoid. Avoid. Avoid. The worst broadband qual...


In [339]:
data.pop ('Date')
data

Unnamed: 0,Name,Review
0,Lucy,I am currently a plusnet broadband customer w...
1,BJ,Avoid. Avoid. Avoid. First John Lewis Broadba...
2,Nic,They've taken my money and won't refund. I ca...
3,David7667,I ordered broadband with telephone for a rece...
4,clifford,Poor internet phone line went down a few times
5,Ken,I ordered broadband 5 weeks ago still not hea...
6,Shirley chandler,Have waited 3 weeks since I applied for John ...
7,Moe,If I could give zero or negative ratings I wo...
8,Christopher J,John Lewis broadband deserve a positive revie...
9,Mart,Avoid. Avoid. Avoid. The worst broadband qual...


In [340]:
data.pop(" Name")
data

Unnamed: 0,Review
0,I am currently a plusnet broadband customer w...
1,Avoid. Avoid. Avoid. First John Lewis Broadba...
2,They've taken my money and won't refund. I ca...
3,I ordered broadband with telephone for a rece...
4,Poor internet phone line went down a few times
5,I ordered broadband 5 weeks ago still not hea...
6,Have waited 3 weeks since I applied for John ...
7,If I could give zero or negative ratings I wo...
8,John Lewis broadband deserve a positive revie...
9,Avoid. Avoid. Avoid. The worst broadband qual...


In [325]:
text = [word.lower() for word in data if word not in stop_words]
print (text)

[' review']


In [301]:
data[' Review] = data[' Review'].apply(stopwords) 
data

Unnamed: 0,Review
0,"[i, currently, plusnet, broadband, customer, i..."
1,"[avoid., avoid., avoid., first, john, lewis, b..."
2,"[they've, taken, money, refund., i, cancelled,..."
3,"[i, ordered, broadband, telephone, recently, r..."
4,"[poor, internet, phone, line, went, times]"
5,"[i, ordered, broadband, 5, weeks, ago, still, ..."
6,"[have, waited, 3, weeks, since, i, applied, jo..."
7,"[if, i, could, give, zero, negative, ratings, ..."
8,"[john, lewis, broadband, deserve, positive, re..."
9,"[avoid., avoid., avoid., the, worst, broadband..."


In [270]:
for col in data.columns:
    print (col, data[col].isnull().sum())

 Review 0


In [302]:
count = data[' Review'].value_counts()
print (count)

[i, currently, plusnet, broadband, customer, i, happy, continue, using., however, contract, ended, plusnet, give, "new, customer", cashback, incentive, i, considering, switching, john, lewis, effectively, still, plusnet, knowledge., in, theory, i, switch, john, lewis, service, i, get, current, £75, black, friday, deal, new, customer., has, anyone, used, providers, before?, i, presuming, would, fairly, straightforward, switch, considering, effectively, isp.]                                                                                                                                                                                                                                                                                        1
[avoid., avoid., avoid., the, worst, broadband, quality, poorest, service, i’ve, ever, encountered., service, cut, endlessly, every, day.., spent, 4, months, trying, rectify, speaking, technical, support., 8, different, people, suggested, 8, alternative, ‘f

In [303]:
words= []
with open('JLAllPages.csv', 'r') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
         csv_words = row[2].split(" ")
         for i in csv_words:
              words.append(i)

words_counted = []
for i in words:
    x = words.count(i)
    words_counted.append((i,x))

In [304]:
set(words_counted)

{('', 18),
 ('"new', 1),
 ("'customer", 1),
 ("'on'", 1),
 ('(after', 1),
 ('(every', 1),
 ('(no', 1),
 ('(or', 1),
 ('-', 4),
 ('-itâ€™s', 1),
 ('0.2mbs.', 1),
 ('10', 1),
 ('103', 1),
 ('14', 1),
 ('19th', 1),
 ('24', 1),
 ('24hrs.', 1),
 ('2mbs', 1),
 ('3', 3),
 ('3hours', 1),
 ('4', 3),
 ('40', 1),
 ('48', 2),
 ('5', 2),
 ('6', 2),
 ('8', 2),
 ('81', 1),
 ('A', 1),
 ('A/L.', 2),
 ('After', 2),
 ('Am', 1),
 ('Apart', 1),
 ('Apparently', 1),
 ('As', 1),
 ('At', 1),
 ('Avoid.', 6),
 ('BT', 2),
 ('Broadband', 3),
 ('By', 1),
 ('Customer', 2),
 ('Dire', 1),
 ('Do', 1),
 ('Ended', 1),
 ('First', 1),
 ('Friday', 1),
 ('Has', 1),
 ('Have', 2),
 ('However', 1),
 ('I', 44),
 ("I'd", 1),
 ('If', 1),
 ('In', 3),
 ('Iâ€™m', 1),
 ('Iâ€™ve', 2),
 ('JL', 4),
 ("JL's", 1),
 ('John', 12),
 ('Just', 1),
 ('Lewis', 11),
 ('Lewis/Waitrose', 1),
 ('Losing', 1),
 ('Many', 1),
 ('NOT', 1),
 ('Needless', 1),
 ('Never', 1),
 ('No', 3),
 ('Nobody', 1),
 ('None', 1),
 ('Not', 1),
 ('One', 1),
 ('Our', 1),
 ('

In [263]:
norm = [word for word in words_counted if word.isalpha()]

AttributeError: 'tuple' object has no attribute 'isalpha'

In [276]:
words_counted.plot(15, cumulative=False)

AttributeError: 'list' object has no attribute 'plot'

In [184]:
with open ("JLAllPages.csv") as f:
    JLdf = pd.read_csv(f)
f.close

<function TextIOWrapper.close()>

In [185]:
JLdf

Unnamed: 0,Date,Name,Review
0,1 month ago,Lucy,I am currently a plusnet broadband customer w...
1,7 months ago,BJ,Avoid. Avoid. Avoid. First John Lewis Broadba...
2,7 months ago,Nic,They've taken my money and won't refund. I ca...
3,1 year ago,David7667,I ordered broadband with telephone for a rece...
4,1 year ago,clifford,Poor internet phone line went down a few times
5,1 year ago,Ken,I ordered broadband 5 weeks ago still not hea...
6,1 year ago,Shirley chandler,Have waited 3 weeks since I applied for John ...
7,1 year ago,Moe,If I could give zero or negative ratings I wo...
8,2 years ago,Christopher J,John Lewis broadband deserve a positive revie...
9,2 years ago,Mart,Avoid. Avoid. Avoid. The worst broadband qual...


In [134]:
from csv import reader
with open ('JLAllPages.csv','r') as source:
    reader = csv.reader(source, delimiter=',')
    for lines in reader:
        print (lines[2])

 Review
 I am currently a plusnet broadband customer which I am happy to continue using. However as my contract has now ended and plusnet will not give me the "new customer" cashback incentive I am considering switching to John Lewis who are effectively still plusnet to my knowledge. In theory if I switch to John Lewis it should be the same service but I will get the current Â£75 black Friday deal as a new customer. Has anyone used both providers before? I am presuming it would be a fairly straightforward switch over considering they are effectively the same isp.
 Avoid. Avoid. Avoid. First John Lewis Broadband failed to process my order and I incurred increased charges from my old supplier. Then John Lewis Broadband never actually got around to pressing the 'on' button to connect my broadband when they were supposed to. Just when I thought I'd escaped these incompetents by moving to a new supplier when they jacked their prices up - it now turns out that John Lewis Broadband haven't ac

In [145]:
review = [word.lower()for lines in reader]

ValueError: I/O operation on closed file.

In [133]:
review = [word.lower()for word in reader if word.isalpha()]
print (review)

ValueError: I/O operation on closed file.