# Import and Clean DS

In [1]:
# connect to s3 instance
import boto3
YOUR_ACCESS_KEY = 
YOUR_SECRET_KEY = 

session = boto3.Session(aws_access_key_id= YOUR_ACCESS_KEY, 
                        aws_secret_access_key= YOUR_SECRET_KEY)

s3 = session.resource("s3")
client = session.client("s3")

In [2]:
import pandas as pd
import glob
import io

In [3]:
#full DS
obj = s3.Object('jedha-fake-reviews-project', "datasets/full_dataset.csv")
dataset = pd.read_csv(io.BytesIO(obj.get()['Body'].read()), low_memory = False, index_col=0)

In [4]:
#_____________________________________________________________________
######### Cleaning the dataset and adding new columns #########
#_____________________________________________________________________

#we drop rows in which restaurant infos are not available (miss scraped)
dataset = dataset.dropna(subset = ['restaurant_average_rating', 'restaurant_reviews_count', 'restaurant_expensiveness', 'restaurant_name'])

#adding a column with the length of the text review
dataset['text_length'] = dataset['text_review'].apply(lambda x : len(x))

#_____________________________________________________________________
######### Fixing existing columns values and types #########
#_____________________________________________________________________

#for the user_total_image_posted column, if user_total_image_posted is NA it means there is there's no image
    # so we set the value to 0
dataset.loc[dataset['user_total_image_posted'].isna(), 'user_total_image_posted'] = 0

#for the date column,  there is some miss scraps that we want to fix
    # a correct data must have a length of 10 , if it is smaller than 10 it's becasue we scrapped the number of images of the user instead
    # we may have to scrap again those lines to fix it
    # we keep only the rows where the date is correct 
mask_not_date = dataset['date'].apply(lambda x: len(x)) < 10
dataset = dataset.loc[mask_not_date == False, :]
    # if te length is greater than 10 is it is beacause we scraped the date + somme additional words ('Avis mis à jour') so we will keep only the part with the date
mask_date_to_fix = dataset['date'].apply(lambda x: len(x)) > 10
dataset.loc[mask_date_to_fix, 'date' ] = dataset.loc[mask_date_to_fix, 'date' ].str.split('\n').str[0]
    #finally we can convert the date column to a datetime format
dataset['date'] = pd.to_datetime(dataset['date'])

#for the photos_for_review column, 
    # value -1 is in fact 0 (no photos found by the scraper)
dataset.loc[dataset['photos_for_review'] == '-1.0', 'photos_for_review' ] = '0'
    # value L is in fact 0 (no photos found by the scraper but scraped the first letter of "L'avis du jour" which happens when the reviews was updated by the user)
dataset.loc[dataset['photos_for_review'] == 'L', 'photos_for_review' ] = '0'
    # finally we can convert the photos_for_review column to an int format
dataset['photos_for_review'] = dataset['photos_for_review'].astype('int')

#for the photos_for_review column, 
    # when there's no info about the expensiveness we set it to -1
dataset.loc[dataset['restaurant_expensiveness'] == 'N/C', 'restaurant_expensiveness']  = -1
    # we can convert the restaurant_expensiveness column to an int format
dataset['restaurant_expensiveness'] = dataset['restaurant_expensiveness'].astype('int')

# change is real review for is fake review as it's better for sklearn 
dataset["is_fake_review"] = dataset["is_real_review"].apply(lambda x: '1' if x == 0 else '0')
dataset["is_fake_review"] = dataset["is_fake_review"].astype(int)
dataset = dataset.drop(columns="is_real_review")

# reset index 
dataset = dataset.reset_index(drop = True)


In [5]:
french_reviews = dataset.loc[dataset['language'] =='fr',['text_review', 'is_fake_review']].reset_index(drop=True)

In [6]:
french_reviews

Unnamed: 0,text_review,is_fake_review
0,Bon retour !\nJe suis revenue dans ce resto ap...,0
1,A optimiser...\nCuisine très traditionnelle da...,0
2,Brasserie chic\nUne brasserie authentiquement ...,0
3,Tres bien\nPetit diner entre amis. Les plats e...,0
4,Un bistrot bien sympathique\nNous avons mangé ...,0
...,...,...
87046,"Du choix, un service extrêmement rapide, le re...",1
87047,"Vraiment un des meilleur kebab du coin, servic...",1
87048,Très déçu!!!\nCe soir j'ai eu envie de manger ...,1
87049,J'y vais depuis le début mais j'avoue qu'avec ...,1


# Preprocessing for NLP

In [7]:
import pandas as pd
import numpy as np 
import spacy
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import DBSCAN
import fr_core_news_md

In [8]:
data = french_reviews.copy()

In [9]:
# strip
data["text_review_clean"] = data["text_review"].str.strip()

#lower
data["text_review_clean"] = data["text_review_clean"].str.lower()

data["text_review_clean"] = data["text_review_clean"].str.replace('\n', ' ')


In [10]:
data["text_review_clean"] = data["text_review_clean"].str.replace(r"<[a-z/]+>", " ")


In [11]:
data["text_review_clean"] = data["text_review_clean"].str.replace(r"[^A-zÀ-ÿ0-9' ]+", " ").astype(str)

# Tokenizing, lemmatizing and deleteing stopwords from doc with Spacy


In [12]:

# first let's find the count of all words and return them in the form of dict items
from collections import Counter

word_count = Counter(' '.join(data["text_review_clean"]).split()).items() #
print(len(word_count))

89422


In [13]:
# create df with all words and their count
word_count = pd.DataFrame({'word': [item[0] for item in list(word_count)], 
             'count' : [item[1] for item in list (word_count)]})

# format
word_count = word_count.sort_values('count', ascending = False)

In [14]:
# take all words that occur more than 500 times
commonwords = word_count.loc[word_count["count"]>=2000, :]
commonwords

Unnamed: 0,word,count
12,de,258291
29,et,219008
17,le,189740
21,la,169772
60,un,144444
...,...,...
1921,porc,2033
3607,bière,2029
1013,vue,2028
641,délice,2016


In [15]:
# create nlp instance
nlp =  fr_core_news_md.load()

In [16]:

# lemmatize common words 
commonwords["word"] = commonwords["word"].apply(lambda x: nlp(x))
commonwords["word"] = commonwords["word"].apply(lambda x: [token.lemma_ for token in x])
commonwords.head(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  commonwords["word"] = commonwords["word"].apply(lambda x: nlp(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  commonwords["word"] = commonwords["word"].apply(lambda x: [token.lemma_ for token in x])


Unnamed: 0,word,count
12,[de],258291
29,[et],219008
17,[le],189740
21,[le],169772
60,[un],144444


In [17]:
# join
commonwords["word"] = commonwords["word"].str.join("")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  commonwords["word"] = commonwords["word"].str.join("")


In [18]:
# make list
common_words = commonwords.word
common_words

12          de
29          et
17          le
21          le
60          un
         ...  
1921      porc
3607     bière
1013       vue
641     délice
628         30
Name: word, Length: 435, dtype: object

In [19]:
# append to stopwords 
from spacy.lang.fr.stop_words import STOP_WORDS
print(len(STOP_WORDS))
STOP_WORDS_MAX = STOP_WORDS.union(common_words)

# also add the lemmatizer for pronouns as we won't need them
STOP_WORDS_MAX.add("-PRON-")
print(len(STOP_WORDS_MAX))

600
852


In [20]:

#  apply nlp to transform into doc
data["clean_tokens"] = data["text_review_clean"].apply(lambda x: nlp(x))

In [21]:
data.head(5)

Unnamed: 0,text_review,is_fake_review,text_review_clean,clean_tokens
0,Bon retour !\nJe suis revenue dans ce resto ap...,0,bon retour je suis revenue dans ce resto apr...,"(bon, retour, , je, suis, revenue, dans, ce,..."
1,A optimiser...\nCuisine très traditionnelle da...,0,a optimiser cuisine très traditionnelle dans ...,"(a, optimiser, , cuisine, très, traditionnell..."
2,Brasserie chic\nUne brasserie authentiquement ...,0,brasserie chic une brasserie authentiquement p...,"(brasserie, chic, une, brasserie, authentiquem..."
3,Tres bien\nPetit diner entre amis. Les plats e...,0,tres bien petit diner entre amis les plats et...,"(tres, bien, petit, diner, entre, amis, , les..."
4,Un bistrot bien sympathique\nNous avons mangé ...,0,un bistrot bien sympathique nous avons mangé e...,"(un, bistrot, bien, sympathique, nous, avons, ..."


In [22]:
# lemmatize each token and remove stop words --> could be done in two steps but we do it in one
data['clean_tokens_lemmatized'] = data['clean_tokens'].apply(lambda doc: [token.lemma_ for token in doc if token.lemma_ not in STOP_WORDS_MAX])
data.head(5)

Unnamed: 0,text_review,is_fake_review,text_review_clean,clean_tokens,clean_tokens_lemmatized
0,Bon retour !\nJe suis revenue dans ce resto ap...,0,bon retour je suis revenue dans ce resto apr...,"(bon, retour, , je, suis, revenue, dans, ce,...","[ , revenir, long, absence, an, , , change..."
1,A optimiser...\nCuisine très traditionnelle da...,0,a optimiser cuisine très traditionnelle dans ...,"(a, optimiser, , cuisine, très, traditionnell...","[optimiser, , traditionnel, , soigner, , ,..."
2,Brasserie chic\nUne brasserie authentiquement ...,0,brasserie chic une brasserie authentiquement p...,"(brasserie, chic, une, brasserie, authentiquem...","[chic, authentiquement, parisien, pouce, raffi..."
3,Tres bien\nPetit diner entre amis. Les plats e...,0,tres bien petit diner entre amis les plats et...,"(tres, bien, petit, diner, entre, amis, , les...","[diner, , , tarte, framboise, exquis, , , ..."
4,Un bistrot bien sympathique\nNous avons mangé ...,0,un bistrot bien sympathique nous avons mangé e...,"(un, bistrot, bien, sympathique, nous, avons, ...","[famille, type, , , fort, aimable, , additi..."


In [23]:
### join all of them into new df column
# method 1
data["clean_review"] = data["clean_tokens_lemmatized"].str.join(" ")

In [24]:

print(data.shape)
data.sample(5)

(87051, 6)


Unnamed: 0,text_review,is_fake_review,text_review_clean,clean_tokens,clean_tokens_lemmatized,clean_review
68648,"Pour les gourmants !\nAvec mon ami, nous somme...",0,pour les gourmants avec mon ami nous sommes...,"(pour, les, gourmants, , avec, mon, ami, , ...","[gourmant, , , hasard, , fête, , d, ', ,...",gourmant hasard fête d ' charme lov...
7066,Un adresse incontournable\nBien que venu décou...,0,un adresse incontournable bien que venu découv...,"(un, adresse, incontournable, bien, que, venu,...","[incontournable, péruvien, typique, payer, min...",incontournable péruvien typique payer mine vei...
14444,"Très bien, déjà deux fois qui j'y vais.\nUne f...",0,très bien déjà deux fois qui j'y vais une fo...,"(très, bien, , déjà, deux, fois, qui, j', y, ...","[ , , week, end, proposer, , quantitée, géné...",week end proposer quantitée généreux wee...
45240,Comme dans un paradis\nJe suis très contente d...,0,comme dans un paradis je suis très contente de...,"(comme, dans, un, paradis, je, suis, très, con...","[paradis, content, connaître, , zen, romantiq...",paradis content connaître zen romantique v...
51,1 heure d'attente pour le plat principal\nPas ...,0,1 heure d'attente pour le plat principal pas m...,"(1, heure, d', attente, pour, le, plat, princi...","[heure, attente, principal, , lamentable, , ...",heure attente principal lamentable 1h prin...


# Creating a TFIDF Matrix


In [25]:
from sklearn.feature_extraction.text import TfidfVectorizer


In [26]:
# apply vectorizer to the review column
vectorizer = TfidfVectorizer(smooth_idf=True, min_df=200)
X = vectorizer.fit_transform(data['clean_review'])

In [27]:
len(vectorizer.vocabulary_)

1752

In [28]:
# transform this sparse matrix into a numpy array 
X_dense = X.toarray()
print(X_dense.shape)

(87051, 1752)


In [29]:
# Let's put the matrix into a DF with the feature name (ie word) as column title and the document number as ID
# this is easily doable because the get_feature_names method of vectorizer returns the feature names 
# with the same index as their values in the X_dense matrix
X_df = pd.DataFrame(X_dense, 
             columns=[x for x in vectorizer.get_feature_names()], 
             index=["review_{}".format(i) for i in range (1,87052)])

In [30]:
X_df

Unnamed: 0,100,11,13,13h,14,16,17,18,19,19h,...,étonnant,étonner,étrange,étranger,étroit,étudiant,évidemment,évident,éviter,île
review_1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
review_2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
review_3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
review_4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
review_5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
review_87047,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
review_87048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
review_87049,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
review_87050,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Topic Extraction

In [31]:
# import from sklearn
from sklearn.decomposition import TruncatedSVD

In [137]:
# set it to 12 different topics 
svd = TruncatedSVD(n_components= 70)

# fit to our matrix --> last two columns are those with the previous cluster_values
lsa = svd.fit_transform(X_df)

In [173]:
print(svd.explained_variance_ratio_.sum())

0.1557903741267197


In [34]:
topic_encoded_df = pd.DataFrame(lsa, columns = ["topic_{}".format(i) \
                                                for i in range(1,(lsa.shape[1]+1))]\
                               )
topic_encoded_df.head()

Unnamed: 0,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,...,topic_61,topic_62,topic_63,topic_64,topic_65,topic_66,topic_67,topic_68,topic_69,topic_70
0,0.182937,-0.037876,-0.021556,-0.031425,-0.015129,-0.019524,0.037902,0.042148,-0.059362,0.178819,...,0.000717,0.080641,-0.141749,0.005719,-0.024154,-0.010394,0.04028,0.048726,-0.035746,-0.012122
1,0.081317,-0.005818,-0.003718,0.037146,0.031008,0.012801,-0.033361,0.013321,-0.000854,0.008429,...,-0.011674,0.031722,0.004676,0.002275,0.017245,-0.103015,0.037871,-0.067699,0.031082,0.021013
2,0.072941,-0.001708,0.006482,0.032298,0.029774,-0.022186,-0.071312,0.007261,-0.030092,0.052803,...,-0.086015,0.053606,-0.079024,-0.144618,0.064571,-0.039876,0.137298,0.015482,-0.097854,0.007026
3,0.094846,-0.000364,0.005569,0.033342,0.028184,0.005248,-0.047884,0.041983,-0.050951,0.016491,...,0.096816,-0.023996,0.004972,0.010744,0.060896,-0.045629,0.01904,0.082028,0.021663,0.085922
4,0.114763,-0.012122,-0.005755,-0.001325,-0.017749,-0.008861,0.011973,0.004776,0.010265,0.024023,...,0.073497,-0.016962,-0.020629,0.054399,-0.023851,0.007332,0.012216,-0.115549,0.082958,-0.088175


# Clean Data For Classifier

In [62]:
data_cl = topic_encoded_df.copy()

In [60]:
data["len_review"] = data["text_review"].apply(lambda x : len(str(x)))

In [56]:
data['upper_word_count'] = data['text_review'].apply(lambda x : sum(map(str.isupper, x.split())) )
data['upper_word_count'] = pd.qcut(data['upper_word_count'].rank(method = 'first'), 3, labels = ['low', 'mid', 'high'])

In [57]:
data['exclam_count'] = data['text_review'].apply(lambda x : len(''.join(ch for ch in x if ch =='!')))
data['exclam_count'] = pd.qcut(data['exclam_count'].rank(method = 'first'), 3, labels = ['low', 'high', 'very_high'])






In [59]:
data.groupby('exclam_count')["is_fake_review"].value_counts(normalize=True)


exclam_count  is_fake_review
low           0                 1.000000
high          0                 0.674605
              1                 0.325395
very_high     0                 0.848675
              1                 0.151325
Name: is_fake_review, dtype: float64

In [63]:
# append review length 
data_cl["len_review"] = list(data.len_review)

In [64]:
# append fake review class
data_cl["is_fake_review"] = list(data["is_fake_review"])

In [65]:
# append uppercase
data_cl["upper_word_count"] = list(data["upper_word_count"])

In [66]:
#append exclam
data_cl["exclam_count"] = list(data["exclam_count"])

In [67]:
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import  OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

In [68]:
data_cl

Unnamed: 0,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,...,topic_65,topic_66,topic_67,topic_68,topic_69,topic_70,len_review,is_fake_review,upper_word_count,exclam_count
0,0.182937,-0.037876,-0.021556,-0.031425,-0.015129,-0.019524,0.037902,0.042148,-0.059362,0.178819,...,-0.024154,-0.010394,0.040280,0.048726,-0.035746,-0.012122,359,0,low,high
1,0.081317,-0.005818,-0.003718,0.037146,0.031008,0.012801,-0.033361,0.013321,-0.000854,0.008429,...,0.017245,-0.103015,0.037871,-0.067699,0.031082,0.021013,256,0,high,high
2,0.072941,-0.001708,0.006482,0.032298,0.029774,-0.022186,-0.071312,0.007261,-0.030092,0.052803,...,0.064571,-0.039876,0.137298,0.015482,-0.097854,0.007026,323,0,low,high
3,0.094846,-0.000364,0.005569,0.033342,0.028184,0.005248,-0.047884,0.041983,-0.050951,0.016491,...,0.060896,-0.045629,0.019040,0.082028,0.021663,0.085922,247,0,high,low
4,0.114763,-0.012122,-0.005755,-0.001325,-0.017749,-0.008861,0.011973,0.004776,0.010265,0.024023,...,-0.023851,0.007332,0.012216,-0.115549,0.082958,-0.088175,280,0,low,low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87046,0.056939,-0.014500,-0.009422,-0.012278,-0.012643,-0.000716,-0.001827,-0.006371,-0.001868,-0.014049,...,-0.061815,-0.002758,-0.020445,0.017416,0.016560,-0.022601,195,1,high,very_high
87047,0.015847,-0.000076,-0.005971,-0.006066,0.000145,-0.003124,-0.012159,0.003439,-0.002020,-0.035379,...,0.034951,-0.005332,0.003603,0.042685,0.046822,-0.034121,83,1,high,very_high
87048,0.067937,-0.010810,-0.015722,-0.019105,-0.027498,-0.002027,0.095356,0.143631,-0.026087,-0.087081,...,-0.032202,-0.040765,0.007114,0.000368,0.003463,-0.022455,391,1,high,very_high
87049,0.055320,-0.006079,-0.009600,-0.000292,0.005679,-0.009261,-0.034570,-0.008223,-0.030499,-0.065781,...,-0.000565,-0.005678,-0.039495,-0.002250,0.040420,0.016029,315,1,high,high


In [69]:
# split X
X_cl = data_cl.drop(columns="is_fake_review")
X_cl.head()

Unnamed: 0,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,...,topic_64,topic_65,topic_66,topic_67,topic_68,topic_69,topic_70,len_review,upper_word_count,exclam_count
0,0.182937,-0.037876,-0.021556,-0.031425,-0.015129,-0.019524,0.037902,0.042148,-0.059362,0.178819,...,0.005719,-0.024154,-0.010394,0.04028,0.048726,-0.035746,-0.012122,359,low,high
1,0.081317,-0.005818,-0.003718,0.037146,0.031008,0.012801,-0.033361,0.013321,-0.000854,0.008429,...,0.002275,0.017245,-0.103015,0.037871,-0.067699,0.031082,0.021013,256,high,high
2,0.072941,-0.001708,0.006482,0.032298,0.029774,-0.022186,-0.071312,0.007261,-0.030092,0.052803,...,-0.144618,0.064571,-0.039876,0.137298,0.015482,-0.097854,0.007026,323,low,high
3,0.094846,-0.000364,0.005569,0.033342,0.028184,0.005248,-0.047884,0.041983,-0.050951,0.016491,...,0.010744,0.060896,-0.045629,0.01904,0.082028,0.021663,0.085922,247,high,low
4,0.114763,-0.012122,-0.005755,-0.001325,-0.017749,-0.008861,0.011973,0.004776,0.010265,0.024023,...,0.054399,-0.023851,0.007332,0.012216,-0.115549,0.082958,-0.088175,280,low,low


In [77]:
X_cl.iloc[: , -3]

0        359
1        256
2        323
3        247
4        280
        ... 
87046    195
87047     83
87048    391
87049    315
87050     87
Name: len_review, Length: 87051, dtype: int64

In [70]:
# split y 
y = data_cl["is_fake_review"]

In [71]:
X_train, X_test, y_train, y_test = train_test_split(X_cl,y,
                                                    test_size = 0.2,
                                                    stratify = y , ## Statify splitting when you're training a classification model !
                                                    random_state = 19)

In [78]:
# Create pipeline for numeric features
numeric_features = [-3] # Positions of numeric columns in X_train/X_test
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

In [79]:
# Create pipeline for categorical features
categorical_features = [-1,-2] # Positions of categorical columns in X_train/X_test
categorical_transformer = Pipeline(
    steps=[
    ('encoder', OneHotEncoder(drop='first')) # first column will be dropped to avoid creating correlations between features
    ])

In [80]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

In [81]:
# Preprocessings on train set

X_train = preprocessor.fit_transform(X_train)
print(X_train[0:5,:])

# Preprocessings on test set
X_test = preprocessor.transform(X_test) 
print(X_test[0:5,:])

[[-0.663947    1.          0.          1.          0.        ]
 [-0.27339474  1.          0.          1.          0.        ]
 [-0.75477311  0.          1.          0.          0.        ]
 [-0.88647097  1.          0.          0.          1.        ]
 [-0.78656225  0.          0.          0.          0.        ]]
[[ 0.13078144  0.          1.          1.          0.        ]
 [ 3.57763224  0.          0.          0.          0.        ]
 [-0.34832628  1.          0.          0.          1.        ]
 [ 0.17846515  0.          0.          0.          1.        ]
 [ 0.87782618  0.          0.          0.          0.        ]]


In [81]:
#scaler = StandardScaler()
#X_train["len_review"] = scaler.fit_transform(X_train[["len_review"]])
#X_test["len_review"] = scaler.transform(X_test[["len_review"]])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train["len_review"] = scaler.fit_transform(X_train[["len_review"]])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["len_review"] = scaler.transform(X_test[["len_review"]])


In [82]:
from sklearn.svm import SVC


In [88]:
from sklearn.model_selection import GridSearchCV

In [89]:
from sklearn.model_selection import StratifiedKFold

In [90]:
kfold = StratifiedKFold(n_splits = 5, shuffle=True, random_state=0) 

parameters= {'C': [10], \
            'gamma': [1] ,
             "class_weight": [{1:0.67, 0:0.33}, {1:0.75, 0:0.25}, {1:0.8, 0:0.2}, "balanced"] \
           }

model = SVC()
model_svc =GridSearchCV(model, parameters, cv=kfold, verbose=2, scoring="f1")
model_svc.fit(X_train,y_train)

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV] C=10, class_weight={1: 0.67, 0: 0.33}, gamma=1 ..................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ... C=10, class_weight={1: 0.67, 0: 0.33}, gamma=1, total= 1.9min
[CV] C=10, class_weight={1: 0.67, 0: 0.33}, gamma=1 ..................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.9min remaining:    0.0s


[CV] ... C=10, class_weight={1: 0.67, 0: 0.33}, gamma=1, total= 1.6min
[CV] C=10, class_weight={1: 0.67, 0: 0.33}, gamma=1 ..................
[CV] ... C=10, class_weight={1: 0.67, 0: 0.33}, gamma=1, total= 1.7min
[CV] C=10, class_weight={1: 0.67, 0: 0.33}, gamma=1 ..................
[CV] ... C=10, class_weight={1: 0.67, 0: 0.33}, gamma=1, total= 1.8min
[CV] C=10, class_weight={1: 0.67, 0: 0.33}, gamma=1 ..................
[CV] ... C=10, class_weight={1: 0.67, 0: 0.33}, gamma=1, total= 1.7min
[CV] C=10, class_weight={1: 0.75, 0: 0.25}, gamma=1 ..................
[CV] ... C=10, class_weight={1: 0.75, 0: 0.25}, gamma=1, total= 1.9min
[CV] C=10, class_weight={1: 0.75, 0: 0.25}, gamma=1 ..................
[CV] ... C=10, class_weight={1: 0.75, 0: 0.25}, gamma=1, total= 1.8min
[CV] C=10, class_weight={1: 0.75, 0: 0.25}, gamma=1 ..................
[CV] ... C=10, class_weight={1: 0.75, 0: 0.25}, gamma=1, total= 1.6min
[CV] C=10, class_weight={1: 0.75, 0: 0.25}, gamma=1 ..................
[CV] .

[Parallel(n_jobs=1)]: Done  20 out of  20 | elapsed: 43.1min finished


GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=0, shuffle=True),
             estimator=SVC(),
             param_grid={'C': [10],
                         'class_weight': [{0: 0.33, 1: 0.67},
                                          {0: 0.25, 1: 0.75}, {0: 0.2, 1: 0.8},
                                          'balanced'],
                         'gamma': [1]},
             scoring='f1', verbose=2)

In [91]:
svc_clf = model_svc.best_estimator_


In [92]:
svc_clf

SVC(C=10, class_weight={0: 0.33, 1: 0.67}, gamma=1)

In [93]:
test_pred = svc_clf.predict(X_test)
train_pred = svc_clf.predict(X_train)

In [95]:
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
    
print("Scores for model on test set")
print("")
print('Accuracy Score : {}'.format(str(accuracy_score(y_test,test_pred))))
print('Precision Score : {}'.format(str(precision_score(y_test,test_pred))))
print('Recall Score : {}' .format(str(recall_score(y_test,test_pred ))))
print('F1 Score : {}'.format(str(f1_score(y_test,test_pred))))
    
print("")
print("")
print("Scores for model on train set")
print("")
print('Accuracy Score : {}'.format(str(accuracy_score(y_train,train_pred))))
print('Precision Score : {}'.format(str(precision_score(y_train,train_pred))))
print('Recall Score : {}' .format(str(recall_score(y_train,train_pred))))
print('F1 Score : {}'.format(str(f1_score(y_train,train_pred))))

Scores for model on test set

Accuracy Score : 0.8770317615300672
Precision Score : 0.5947910357359176
Recall Score : 0.7097940007228045
F1 Score : 0.6472235953204811


Scores for model on train set

Accuracy Score : 0.8775703618609995
Precision Score : 0.5962121212121212
Recall Score : 0.7111874209289716
F1 Score : 0.6486441935218


In [97]:
svc_clf2 = SVC(C=10, class_weight={0: 0.33, 1: 0.67}, gamma=1,  probability=True)

In [98]:
svc_clf2.fit(X_train,y_train)

SVC(C=10, class_weight={0: 0.33, 1: 0.67}, gamma=1, probability=True)

In [99]:
X_final_pred = X_cl.copy()

In [100]:
X_final_pred

Unnamed: 0,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,topic_10,...,topic_64,topic_65,topic_66,topic_67,topic_68,topic_69,topic_70,len_review,upper_word_count,exclam_count
0,0.182937,-0.037876,-0.021556,-0.031425,-0.015129,-0.019524,0.037902,0.042148,-0.059362,0.178819,...,0.005719,-0.024154,-0.010394,0.040280,0.048726,-0.035746,-0.012122,359,low,high
1,0.081317,-0.005818,-0.003718,0.037146,0.031008,0.012801,-0.033361,0.013321,-0.000854,0.008429,...,0.002275,0.017245,-0.103015,0.037871,-0.067699,0.031082,0.021013,256,high,high
2,0.072941,-0.001708,0.006482,0.032298,0.029774,-0.022186,-0.071312,0.007261,-0.030092,0.052803,...,-0.144618,0.064571,-0.039876,0.137298,0.015482,-0.097854,0.007026,323,low,high
3,0.094846,-0.000364,0.005569,0.033342,0.028184,0.005248,-0.047884,0.041983,-0.050951,0.016491,...,0.010744,0.060896,-0.045629,0.019040,0.082028,0.021663,0.085922,247,high,low
4,0.114763,-0.012122,-0.005755,-0.001325,-0.017749,-0.008861,0.011973,0.004776,0.010265,0.024023,...,0.054399,-0.023851,0.007332,0.012216,-0.115549,0.082958,-0.088175,280,low,low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87046,0.056939,-0.014500,-0.009422,-0.012278,-0.012643,-0.000716,-0.001827,-0.006371,-0.001868,-0.014049,...,-0.043188,-0.061815,-0.002758,-0.020445,0.017416,0.016560,-0.022601,195,high,very_high
87047,0.015847,-0.000076,-0.005971,-0.006066,0.000145,-0.003124,-0.012159,0.003439,-0.002020,-0.035379,...,-0.034558,0.034951,-0.005332,0.003603,0.042685,0.046822,-0.034121,83,high,very_high
87048,0.067937,-0.010810,-0.015722,-0.019105,-0.027498,-0.002027,0.095356,0.143631,-0.026087,-0.087081,...,0.019538,-0.032202,-0.040765,0.007114,0.000368,0.003463,-0.022455,391,high,very_high
87049,0.055320,-0.006079,-0.009600,-0.000292,0.005679,-0.009261,-0.034570,-0.008223,-0.030499,-0.065781,...,-0.025253,-0.000565,-0.005678,-0.039495,-0.002250,0.040420,0.016029,315,high,high


In [101]:
X_final_pred = preprocessor.transform(X_final_pred) 

In [102]:
predictions = svc_clf2.predict_proba(X_final_pred)

In [129]:
data_cl.is_fake_review

0        0
1        0
2        0
3        0
4        0
        ..
87046    1
87047    1
87048    1
87049    1
87050    1
Name: is_fake_review, Length: 87051, dtype: int64

In [105]:
predictions_svm_nlp = pd.DataFrame(predictions)

In [130]:
predictions_svm_nlp[3] = data_cl.is_fake_review

In [133]:
predictions_svm_nlp.sample(20)

Unnamed: 0,0,1,3
61455,0.95815,0.04185,0
15044,0.958839,0.041161,0
4216,0.957377,0.042623,0
71363,0.760686,0.239314,0
224,0.939691,0.060309,0
45542,0.975652,0.024348,0
79822,0.341989,0.658011,1
51552,0.763849,0.236151,0
67639,0.94026,0.05974,0
61252,0.938742,0.061258,0


In [110]:
# set path and bucket name
PATH = "datasets/predictions_svm_nlp.csv"
bucket = s3.Bucket(name = "jedha-fake-reviews-project")
# export dataset as csv
data = predictions_svm_nlp.to_csv()

#upload to bucket
put_object = bucket.put_object(ACL='private', Key= PATH, Body=data)
#check 
for obj in bucket.objects.all():
    print(obj.key)

datasets/fake_reviews_raw.csv
datasets/full_dataset.csv
datasets/full_dataset_reworked.csv
datasets/predictions_svm_nlp.csv
datasets/real_reviews_raw.csv


In [160]:
dataset.to_csv("/Users/personal/Dropbox/dataset_tableau.csv", sep="}")

In [153]:
dataset.to_excel("/Users/personal/Dropbox/dataset_tableau.xlsx")  

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("

  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "
  warn("Ignoring URL '%s' since it exceeds Excel's limit of "


KeyboardInterrupt: 

In [161]:
ls

Meta_data_analysis.ipynb     Untitled.ipynb
Neural_network.ipynb         nlp_classifier.ipynb
Stacking_NLP_METADATA.ipynb


In [162]:
cd ../

/Users/personal/Dropbox/Jehda/nlp_project/git/Fake_reviews_detection


In [163]:
ls

[34m0_Scraping[m[m/        [34m2_Deployment[m[m/      [34mScraping[m[m/
[34m1_Training_models[m[m/ README.md          [34mTraining_models[m[m/


In [164]:
cd 1_Training_models

/Users/personal/Dropbox/Jehda/nlp_project/git/Fake_reviews_detection/1_Training_models


In [166]:
mkdir text_only_models

In [167]:
cd text_only_models

/Users/personal/Dropbox/Jehda/nlp_project/git/Fake_reviews_detection/1_Training_models/text_only_models


In [171]:
import joblib

In [172]:

# Save to file in the current working directory
joblib_file = "text_vectorizer.pkl"
joblib.dump(vectorizer, joblib_file)




['text_vectorizer.pkl']

In [174]:
# Save to file in the current working directory
joblib_file = "topic_extractor.pkl"
joblib.dump(lsa, joblib_file)


['topic_extractor.pkl']

In [175]:
# Save to file in the current working directory
joblib_file = "main_model.pkl"
joblib.dump(svc_clf2, joblib_file)

['main_model.pkl']

In [176]:
# Save to file in the current working directory
joblib_file = "preprocessor.pkl"
joblib.dump(preprocessor, joblib_file)

['preprocessor.pkl']