In [32]:
import pandas as pd

In [33]:
#importing the dataframe
df = pd.read_csv("./Data/Data.csv")

In [34]:
df.head()

Unnamed: 0,Privacy Text,Score
0,We are unable to respond to Do Not Track signa...,1
1,These tracking technologies collect informatio...,1
2,We and our third-party partners may also use c...,1
3,Company may share data collected from or about...,1
4,In the event that the company is involved in a...,1


In [35]:
df.shape

(227, 2)

In [36]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 227 entries, 0 to 226
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Privacy Text  227 non-null    object
 1   Score         227 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 3.7+ KB


### Text preprocessing

#### Removing Punctuations and converting all word to lowercase

In [37]:
import string
import nltk


def remove_punctuation(text):
    no_punctuation_text = ''.join([i for i in str(text) if i not in string.punctuation])
    return no_punctuation_text.lower()

In [38]:
df['Privacy Text'] = df['Privacy Text'].apply(remove_punctuation)

In [39]:
df.head()

Unnamed: 0,Privacy Text,Score
0,we are unable to respond to do not track signa...,1
1,these tracking technologies collect informatio...,1
2,we and our thirdparty partners may also use co...,1
3,company may share data collected from or about...,1
4,in the event that the company is involved in a...,1


#### Removing nonwords and reducing word to it's lemma

In [40]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [41]:
import re 

def remove_nonwords(str_):
    return re.sub("[^A-Za-z ]\w+[^A-Za-z]*", ' ', str_)

# Lemmatization and Removing stop words and non words
def text_preprocessing(text):
    text = remove_nonwords(text)
    tokenized_text = [token.lemma_ for token in nlp(text)]
    no_stopwords_list = [i.lower() for i in tokenized_text if i not in nlp.Defaults.stop_words]
    lemma_text = ' '.join(no_stopwords_list)
    return lemma_text

In [42]:
# Preprocessing the Headline text
df['Privacy Text'] = df['Privacy Text'].apply(text_preprocessing)

In [43]:
df.head()

Unnamed: 0,Privacy Text,Score
0,unable respond track signal set browser time,1
1,track technology collect information use servi...,1
2,thirdparty partner use cookie tracking technol...,1
3,company share datum collect party partner faci...,1
4,event company involve merger acquisition bankr...,1


In [44]:
# dropping ALL duplicte values
df = df.drop_duplicates()

### Building Model

#### Split data into train and test sets

In [45]:
from sklearn.model_selection import train_test_split

X_train_nv, X_test_nv, y_train, y_test = train_test_split(df['Privacy Text'], df['Score'], 
                                                    train_size=0.8, 
                                                    random_state=42)

#### Vectorization

In [46]:
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.feature_selection import SelectKBest, chi2

vectorizer = TfidfVectorizer(max_features=500,stop_words=stopwords.words('english'))
X_train = vectorizer.fit_transform(X_train_nv).toarray()
X_test = vectorizer.transform(X_test_nv)

In [48]:
#vectorizer = CountVectorizer(ngram_range = (3,3))
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(X_train_nv) 
X_test = vectorizer.transform(X_test_nv) 
features = (vectorizer.get_feature_names())

In [49]:
#Save the vectorizer and feature Selector
import pickle

pickle.dump(vectorizer, open("./Model/vectorizer.pkl", "wb"))

#### Training the model

In [50]:
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import *
from sklearn.svm import SVC

LR = LogisticRegression()
SGDC = SGDClassifier()
RFC = RandomForestClassifier(n_estimators=300, random_state=0)

# Logistic Regression
LR.fit(X_train, y_train)
LR_Model = LR.predict(X_test)
print("\nLogistic Regression Algorithm\n")
print("Confusion Matrix: \n",confusion_matrix(y_test,LR_Model))
print("Classification Report: \n",classification_report(y_test,LR_Model))
print("Accuracy Score: \n",accuracy_score(y_test, LR_Model))

# Stochastic Gradient Descent
SGDC.fit(X_train, y_train)
SGDC_Model = SGDC.predict(X_test)
print("\nStochastic Gradient Descent Algorithm\n")
print("Confusion Matrix: \n",confusion_matrix(y_test,SGDC_Model))
print("Classification Report: \n",classification_report(y_test,SGDC_Model))
print("Accuracy Score: \n",accuracy_score(y_test, SGDC_Model))

# Random Forest Classifier 
RFC.fit(X_train, y_train)
RFC_Model = RFC.predict(X_test)
print("\nRandom Forest Classifier  Algorithm\n")
print("Confusion Matrix: \n",confusion_matrix(y_test,RFC_Model))
print("Classification Report: \n",classification_report(y_test,RFC_Model))
print("Accuracy Score: \n",accuracy_score(y_test, RFC_Model))



Logistic Regression Algorithm

Confusion Matrix: 
 [[16 10]
 [ 8 12]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.67      0.62      0.64        26
           1       0.55      0.60      0.57        20

    accuracy                           0.61        46
   macro avg       0.61      0.61      0.61        46
weighted avg       0.61      0.61      0.61        46

Accuracy Score: 
 0.6086956521739131

Stochastic Gradient Descent Algorithm

Confusion Matrix: 
 [[19  7]
 [ 8 12]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.70      0.73      0.72        26
           1       0.63      0.60      0.62        20

    accuracy                           0.67        46
   macro avg       0.67      0.67      0.67        46
weighted avg       0.67      0.67      0.67        46

Accuracy Score: 
 0.6739130434782609

Random Forest Classifier  Algorithm

Confusion Matrix: 
 [[22  4]
 [ 7 

In [51]:
# Saving the model
pickle.dump(RFC, open("./Model/Random_Forest_Classifier.pkl", "wb"))

### Trying the model in custom data

In [20]:
sim_sentence = ['To provide the Facebook Products , we must process information about you .',
 'The types of information we collect depend on how you use our Products .',
 'Things you and others do and provide Information and content you provide .',
 'We collect the content , communications and other information you provide when you use our Products , including when you sign up for an account , create or share content , and message or communicate with others .',
 'This can include information in or about the content you provide ( like metadata ) , such as the location of a photo or the date a file was created .',
 'Data with special protections : You can choose to provide information in your Facebook profile fields or Life Events about your religious views , political views , who you are `` interested in , `` or your health .',
 'We collect information about the people , Pages , accounts , hash tags and groups you are connected to and how you interact with them across our Products , such as people you communicate with the most or groups you are part of .',
 'We also collect contact information if you choose to upload , sync or import it from a device ( such as an address book or call log or SMS log history ) , which we use for things like helping you and others find people you may know and for the other purposes listed below Your usage .',
 'We collect information about how you use our Products , such as the types of content you view or engage with ; the features you use ; the actions you take ; the people or accounts you interact with ; and the time , frequency and duration of your activities .',
 "For example , we log when you 're using and have last used our Products , and what posts , videos and other content you view on our Products .",
 'We also collect information about how you use features like our camera Information about transactions made on our Products .',
 'If you use our Products for purchases or other financial transactions ( such as when you make a purchase in a game or make a donation ) , we collect information about the purchase or transaction .',
 'This includes payment information , such as your credit or debit card number and other card information ; other account and authentication information ; and billing , shipping and contact details Things others do and information they provide about you .',
 'We also receive and analyze content , communications and information that other people provide when they use our Products .',
 'For example , we use information collected about your use of our Products on your phone to better personalize the content ( including ads ) or features you see when you use our Products on another device , such as your laptop or tablet , or to measure whether you took an action in response to an ad we showed you on your phone on a different device .',
 'Learn more about how we use cookies in the Facebook Cookies Policy and Insta gram Cookies Policy Information from partners .',
 'These partners provide information about your activities off Facebook including information about your device , websites you visit , purchases you make , the ads you see , and how you use their services whether or not you have a Facebook account or are logged into Facebook .',
 'Partners receive your data when you visit or use their services or through third parties they work with .',
 'We require each of these partners to have lawful rights to collect , use and share your data before providing any data to us .',
 'We use the information we have ( subject to choices you make ) as described below and to provide and support the Facebook Products and related services described in the Facebook Terms and Insta gram Terms .',
 'Learn more about how we use information about you to personalize your Facebook and Insta gram experience , including features , content and recommendations in Facebook Products ; you can also learn more about how we choose the ads that you see .',
 'Information across Facebook Products and devices : We connect information about your activities on different Facebook Products and devices to provide a more tailored and consistent experience on all Facebook Products you use , wherever you use them .',
 "We can also make your experience more seamless , for example , by automatically filling in your registration information ( such as your phone number ) from one Facebook Product when you sign up for an account on a different Product Location related information : We use location related information such as your current location , where you live , the places you like to go , and the businesses and people you 're near to provide , personalize and improve our Products , including ads , for you and others .",
 'If we introduce face recognition technology to your Insta gram experience , we will let you know first , and you will have control over whether we use this technology for you Ads and other sponsored content : We use the information we have about you including information about your interests , actions and connections to select and personalize ads , offers and other sponsored content that we show you .',
 'We use the information we have ( including your activity off our Products , such as the websites you visit and ads you see ) to help advertisers and other partners measure the effectiveness and distribution of their ads and services , and understand the types of people who use their services and how people interact with their websites , apps , and services .',
 'Learn how we share information with these partners .',
 'Communicate with you .',
 'We also use your information to respond to you when you contact us .',
 'Your information is shared with others in the following ways : Sharing on Facebook Products People and accounts you share and communicate with When you share and communicate using our Products , you choose the audience for what you share .',
 'Similarly , when you use Messenger or Insta gram to communicate with people or businesses , those people and businesses can see the content you send .',
 'Your network can also see actions you have taken on our Products , including engagement with ads and sponsored content .',
 "Public information can be seen by anyone , on or off our Products , including if they do n't have an account .",
 'Content others share or re share about you You should consider who you choose to share with , because people who can see your activity on our Products can choose to share it with others on and off our Products , including people and businesses outside the audience you shared with .',
 'People can also use our Products to create and share content about you with the audience they choose .',
 'For example , people can share a photo of you in a Story , mention or tag you at a location in a post , or share information about you in their posts or messages .',
 'Apps , websites , and third party integration s on or using our Products .',
 'When you choose to use third party apps , websites , or other services that use , or are integrated with , our Products , they can receive information about what you post or share .',
 'Also , when you download or use such third party services , they can access your public profile on Facebook , and any information that you share with them .',
 'Information collected by these third party services is subject to their own terms and policies , not this one .',
 'Requesting any other data will require our approval New owner .',
 'If the ownership or control of all or part of our Products or their assets changes , we may transfer your information to the new owner .',
 'Sharing with Third Party Partners We work with third party partners who help us and improve our Products or who use Facebook Business Tools to grow their businesses , which makes it possible to operate our companies and provide free services to people around the world .',
 "We do n't sell any of your information to anyone , and we never will .",
 'We also impose strict restrictions on how our partners can use and disclose the data we provide .',
 'Here are the types of third parties we share information with : Partners who use our analytics services .',
 'Advertisers .',
 "We provide advertisers with reports about the kinds of people seeing their ads and how their ads are performing , but we do n't share information that personally identifies you ( information such as your name or email address that by itself can be used to contact you or identifies who you are ) unless you give us permission .",
 'We also confirm which Facebook ads led you to make a purchase or take an action with an advertiser .',
 'Measurement partners .',
 'Partners offering goods and services in our Products .',
 "When you delete your account , we delete things you have posted , such as your photos and status updates , and you wo n't be able to recover that information later .",
 "Information that others have shared about you is n't part of your account and wo n't be deleted .",
 'To delete your account at any time , please visit the Facebook Settings and Insta gram Settings .',
 'Your information may , for example , be transferred or transmitted to , or stored and processed in the United States or other countries outside of where you live for the purposes as described in this policy .',
 "We 'll notify you before we make changes to this policy and give you the opportunity to review the revised policy before you choose to continue using our Products ."]

In [21]:
sim_sentence = list(set(sim_sentence))

In [22]:
sim_sentence_lower = list(map(remove_punctuation, sim_sentence))

In [23]:
sim_sentence_lemma = list(map(text_preprocessing, sim_sentence_lower))

In [24]:
sim_sentence_vector = vectorizer.transform(sim_sentence_lemma)

In [26]:
predicted = RFC.predict(sim_sentence_vector)

In [27]:
predicted

array([0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,
       0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0,
       0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1], dtype=int64)

#### Good Privacy Text

In [31]:
for i,sentence in enumerate(sim_sentence):
    if predicted[i] == 0:
        if len(sentence.split(" ")) > 5:
            print(sentence )
        

Requesting any other data will require our approval New owner .
Your information is shared with others in the following ways : Sharing on Facebook Products People and accounts you share and communicate with When you share and communicate using our Products , you choose the audience for what you share .
We also use your information to respond to you when you contact us .
When you choose to use third party apps , websites , or other services that use , or are integrated with , our Products , they can receive information about what you post or share .
Data with special protections : You can choose to provide information in your Facebook profile fields or Life Events about your religious views , political views , who you are `` interested in , `` or your health .
Learn how we share information with these partners .
We 'll notify you before we make changes to this policy and give you the opportunity to review the revised policy before you choose to continue using our Products .
This include