# Data Cleaning / Pre-Processing

In [82]:
import pandas as pd

data = pd.read_csv("../csv/tourist_destinations_review_posts_baguio.csv")
data.dropna()
data

Unnamed: 0,tourist_destination,review_post
0,Burnham Park,An excellent place for family and friends to v...
1,Burnham Park,A lovely place to be on a nice day.We were ple...
2,Burnham Park,Definitely one of the best places to visit in ...
3,Burnham Park,I have always been looking at this park ever s...
4,Burnham Park,Very beautiful park with boating and cycling a...
...,...,...
7818,Teacher's Camp,Place is quite old but still works very well. ...
7819,Teacher's Camp,Not crowded. Lots of open space.
7820,Teacher's Camp,Some staff are not approachable when i went to...
7821,Teacher's Camp,Very very affordable place. Nice and cozy. Kak...


In [83]:
data = data.drop_duplicates(subset=['review_post'])
data

Unnamed: 0,tourist_destination,review_post
0,Burnham Park,An excellent place for family and friends to v...
1,Burnham Park,A lovely place to be on a nice day.We were ple...
2,Burnham Park,Definitely one of the best places to visit in ...
3,Burnham Park,I have always been looking at this park ever s...
4,Burnham Park,Very beautiful park with boating and cycling a...
...,...,...
7818,Teacher's Camp,Place is quite old but still works very well. ...
7819,Teacher's Camp,Not crowded. Lots of open space.
7820,Teacher's Camp,Some staff are not approachable when i went to...
7821,Teacher's Camp,Very very affordable place. Nice and cozy. Kak...


In [84]:
import re
import nltk
from cleantext import clean

from sklearn.feature_extraction import text
stop_words = list(text.ENGLISH_STOP_WORDS)
stop_words[0:5]

['although', 'whereupon', 'de', 'my', 'these']

In [85]:
_data = data.copy()

_data['removed_special_charas_review_post'] = _data['review_post'].map(lambda x: re.sub('[^A-Za-z ]+', ' ', str(x)))
_data


Unnamed: 0,tourist_destination,review_post,removed_special_charas_review_post
0,Burnham Park,An excellent place for family and friends to v...,An excellent place for family and friends to v...
1,Burnham Park,A lovely place to be on a nice day.We were ple...,A lovely place to be on a nice day We were ple...
2,Burnham Park,Definitely one of the best places to visit in ...,Definitely one of the best places to visit in ...
3,Burnham Park,I have always been looking at this park ever s...,I have always been looking at this park ever s...
4,Burnham Park,Very beautiful park with boating and cycling a...,Very beautiful park with boating and cycling a...
...,...,...,...
7818,Teacher's Camp,Place is quite old but still works very well. ...,Place is quite old but still works very well ...
7819,Teacher's Camp,Not crowded. Lots of open space.,Not crowded Lots of open space
7820,Teacher's Camp,Some staff are not approachable when i went to...,Some staff are not approachable when i went to...
7821,Teacher's Camp,Very very affordable place. Nice and cozy. Kak...,Very very affordable place Nice and cozy Kak...


In [86]:
_data['to_lower_case_review_post'] = _data['removed_special_charas_review_post'].map(lambda x: x.lower())
_data

Unnamed: 0,tourist_destination,review_post,removed_special_charas_review_post,to_lower_case_review_post
0,Burnham Park,An excellent place for family and friends to v...,An excellent place for family and friends to v...,an excellent place for family and friends to v...
1,Burnham Park,A lovely place to be on a nice day.We were ple...,A lovely place to be on a nice day We were ple...,a lovely place to be on a nice day we were ple...
2,Burnham Park,Definitely one of the best places to visit in ...,Definitely one of the best places to visit in ...,definitely one of the best places to visit in ...
3,Burnham Park,I have always been looking at this park ever s...,I have always been looking at this park ever s...,i have always been looking at this park ever s...
4,Burnham Park,Very beautiful park with boating and cycling a...,Very beautiful park with boating and cycling a...,very beautiful park with boating and cycling a...
...,...,...,...,...
7818,Teacher's Camp,Place is quite old but still works very well. ...,Place is quite old but still works very well ...,place is quite old but still works very well ...
7819,Teacher's Camp,Not crowded. Lots of open space.,Not crowded Lots of open space,not crowded lots of open space
7820,Teacher's Camp,Some staff are not approachable when i went to...,Some staff are not approachable when i went to...,some staff are not approachable when i went to...
7821,Teacher's Camp,Very very affordable place. Nice and cozy. Kak...,Very very affordable place Nice and cozy Kak...,very very affordable place nice and cozy kak...


In [87]:
_data['removed_stop_words_review_post'] = _data['to_lower_case_review_post'].map(lambda x: " ".join(x for x in x.split() if x not in stop_words))
_data

Unnamed: 0,tourist_destination,review_post,removed_special_charas_review_post,to_lower_case_review_post,removed_stop_words_review_post
0,Burnham Park,An excellent place for family and friends to v...,An excellent place for family and friends to v...,an excellent place for family and friends to v...,excellent place family friends visit entrance ...
1,Burnham Park,A lovely place to be on a nice day.We were ple...,A lovely place to be on a nice day We were ple...,a lovely place to be on a nice day we were ple...,lovely place nice day pleasantly surprised ple...
2,Burnham Park,Definitely one of the best places to visit in ...,Definitely one of the best places to visit in ...,definitely one of the best places to visit in ...,definitely best places visit baguio burnham pa...
3,Burnham Park,I have always been looking at this park ever s...,I have always been looking at this park ever s...,i have always been looking at this park ever s...,looking park chance review park flexed view pa...
4,Burnham Park,Very beautiful park with boating and cycling a...,Very beautiful park with boating and cycling a...,very beautiful park with boating and cycling a...,beautiful park boating cycling available insid...
...,...,...,...,...,...
7818,Teacher's Camp,Place is quite old but still works very well. ...,Place is quite old but still works very well ...,place is quite old but still works very well ...,place quite old works parking problem good pro...
7819,Teacher's Camp,Not crowded. Lots of open space.,Not crowded Lots of open space,not crowded lots of open space,crowded lots open space
7820,Teacher's Camp,Some staff are not approachable when i went to...,Some staff are not approachable when i went to...,some staff are not approachable when i went to...,staff approachable went make reservations nice...
7821,Teacher's Camp,Very very affordable place. Nice and cozy. Kak...,Very very affordable place Nice and cozy Kak...,very very affordable place nice and cozy kak...,affordable place nice cozy kakatakot lang ng k...


In [88]:
from nltk.stem.wordnet import WordNetLemmatizer

lem = WordNetLemmatizer()

_data['lemmatized_words_review_post'] = _data['removed_stop_words_review_post'].map(lambda x: " ".join(lem.lemmatize(str(x)) for x in x.split()))
_data

Unnamed: 0,tourist_destination,review_post,removed_special_charas_review_post,to_lower_case_review_post,removed_stop_words_review_post,lemmatized_words_review_post
0,Burnham Park,An excellent place for family and friends to v...,An excellent place for family and friends to v...,an excellent place for family and friends to v...,excellent place family friends visit entrance ...,excellent place family friend visit entrance f...
1,Burnham Park,A lovely place to be on a nice day.We were ple...,A lovely place to be on a nice day We were ple...,a lovely place to be on a nice day we were ple...,lovely place nice day pleasantly surprised ple...,lovely place nice day pleasantly surprised ple...
2,Burnham Park,Definitely one of the best places to visit in ...,Definitely one of the best places to visit in ...,definitely one of the best places to visit in ...,definitely best places visit baguio burnham pa...,definitely best place visit baguio burnham par...
3,Burnham Park,I have always been looking at this park ever s...,I have always been looking at this park ever s...,i have always been looking at this park ever s...,looking park chance review park flexed view pa...,looking park chance review park flexed view pa...
4,Burnham Park,Very beautiful park with boating and cycling a...,Very beautiful park with boating and cycling a...,very beautiful park with boating and cycling a...,beautiful park boating cycling available insid...,beautiful park boating cycling available insid...
...,...,...,...,...,...,...
7818,Teacher's Camp,Place is quite old but still works very well. ...,Place is quite old but still works very well ...,place is quite old but still works very well ...,place quite old works parking problem good pro...,place quite old work parking problem good prov...
7819,Teacher's Camp,Not crowded. Lots of open space.,Not crowded Lots of open space,not crowded lots of open space,crowded lots open space,crowded lot open space
7820,Teacher's Camp,Some staff are not approachable when i went to...,Some staff are not approachable when i went to...,some staff are not approachable when i went to...,staff approachable went make reservations nice...,staff approachable went make reservation nice ...
7821,Teacher's Camp,Very very affordable place. Nice and cozy. Kak...,Very very affordable place Nice and cozy Kak...,very very affordable place nice and cozy kak...,affordable place nice cozy kakatakot lang ng k...,affordable place nice cozy kakatakot lang ng k...


In [90]:
_data.to_csv('../csv/cleaned/cleaned_variations_review_posts.csv')