# Imports

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

#Models
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV, KFold, cross_validate
from sklearn.metrics import f1_score

#NLP
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer, HashingVectorizer
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import RegexpTokenizer
import regex as re
from nltk.corpus import stopwords # Import the stop word list
from sklearn.metrics import confusion_matrix

# PCA
from sklearn.decomposition import PCA 

# Resampling Methods
from sklearn.utils import resample
from imblearn.over_sampling import SMOTE
from imblearn.combine import SMOTEENN, SMOTETomek
from imblearn.ensemble import BalancedBaggingClassifier


import warnings
warnings.simplefilter(action = 'ignore', category=FutureWarning)

In [11]:
df = pd.read_csv('./Data/cleaned_reviews.csv')

In [12]:
df.columns

Index(['address', 'cost', 'cuisine', 'name', 'rating', 'review'], dtype='object')

In [89]:
#Change columns to have underscores to not confused with vectorized column
df.rename({'cuisine': 'cuisine_type', 
           'address': 'rest_address', 
           'cost': 'rest_cost', 
           'name': 'rest_name', 
           'rating': 'rest_rating'}, axis =1, inplace=True)

In [17]:
df.loc[0, 'name']

'Omakase Room By Tatsu'

In [21]:
if df.name[0] in df.review[0]:
    print(df.review[0].replace(df.name[0], ''))

Tolerance for group trips can vary widely. Some people have an internal meltdown when a company bowling outing is so much as mentioned, while others might be the enthusiastic ringleaders of a family reunion taking place on a cruise ship. We’re skeptical of those people on the cruise ship wavelength, but we also know that doing things with other human beings in an organized manner can sometimes be a good thing.
 is a sushi place that’s sort of like a group trip, but without flip flops and nametags and with absolutely zero screaming. At this quiet, below-ground spot in the West Village, you and nine other people will be asked to arrive at precisely 6, 7:30, or 9pm. In the 90 minutes that follow, the 10 of you will receive, almost in unison, 18 pieces of top-quality, simply prepared sushi. You’ll order some sake or wine of your own choosing, but otherwise, this is a sushi meal that follows a close script. Fortunately, the sushi is very good, and you won’t be disappointed in the dozen and 

In [20]:
df.review[0]

'Tolerance for group trips can vary widely. Some people have an internal meltdown when a company bowling outing is so much as mentioned, while others might be the enthusiastic ringleaders of a family reunion taking place on a cruise ship. We’re skeptical of those people on the cruise ship wavelength, but we also know that doing things with other human beings in an organized manner can sometimes be a good thing.\nOmakase Room By Tatsu is a sushi place that’s sort of like a group trip, but without flip flops and nametags and with absolutely zero screaming. At this quiet, below-ground spot in the West Village, you and nine other people will be asked to arrive at precisely 6, 7:30, or 9pm. In the 90 minutes that follow, the 10 of you will receive, almost in unison, 18 pieces of top-quality, simply prepared sushi. You’ll order some sake or wine of your own choosing, but otherwise, this is a sushi meal that follows a close script. Fortunately, the sushi is very good, and you won’t be disappo

# Cleaning Text

In [145]:
#lemmatization and stemming
lemmatizer = WordNetLemmatizer()
def lemmatized_column(df, col):
    lemmatized_col = []
    for i in list(range(len(col))):
        text = col[i]
            
        #Lowercase 
        text = text.lower()
            
        # Remove non-letters        
        letters_only = re.sub("[^a-zA-Z1-9]", " ", text)
        #
                
        #remove name of restaurant:
        if str(df.loc[i, 'rest_name']) in text:
            text.replace('rest_name', '')
            
        # split into individual words
        words = letters_only.split()
        #
        #convert the stop words to a set
        stops = set(stopwords.words('english')) 
        # 
        # Remove stop words
        meaningful_words = [w for w in words if not w in stops]
        #

        #
        lemmed_words = [lemmatizer.lemmatize(i) for i in meaningful_words]
        #Join the words back into one string separated by space, 
        # 
        joined_words = ' '.join(lemmed_words)
        lemmatized_col.append(joined_words)
        
        
        
    df['lemmatized'] = lemmatized_col
    return lemmatized_col


In [25]:
lemmed_df = lemmatized_column(df, df['review'])

In [33]:
df.head()

Unnamed: 0,address,cost,cuisine,name,rating,review,lemmatized
0,"\n14 Christopher St\nNew York, NY 10014\n(212)...",4.0,Japanese,Omakase Room By Tatsu,7.7,Tolerance for group trips can vary widely. Som...,Tolerance group trip vary widely Some people i...
1,"\n428 Greenwich St.\nNew York, NY 10013\n212-2...",4.0,Japanese,Sushi Azabu,8.5,When the apocalypse eventually comes for New Y...,When apocalypse eventually come New York City ...
2,"\n264 Clinton St\nNew York, NY 11201\n(347) 98...",3.0,Seafood,Saint Julivert Fisherie,7.7,"When you’re young, you don’t have to think muc...",When young think much decision You know attemp...
3,"\n498 9th Ave\nNew York, NY 10018\n(646) 863-2...",2.0,Russian,Farida,8.0,"There’s a bleak, sweaty place on 34th Street w...",There bleak sweaty place 34th Street adult run...
4,"\n541-B Myrtle Avenue\nNew York, NY 11205\n(71...",2.0,Sushi,U-Gu,7.7,New York is near an ocean with plenty of free ...,New York near ocean plenty free fish asking ca...


### Vectorize Tweets

In [28]:
def vectorize_tweets(method, col, df):
    #Instantiate
    vect = method()
    
    #fit and transform X_train
    model = vect.fit(df[col])
    #model is the fitted model that we can use to transform any upcoming datasets to predict on.
    columns_vect = model.transform(df[col])
    
    #turn both datasets into newly vectorized dataframes
    vect_df = pd.DataFrame(columns_vect.todense(), columns= model.get_feature_names())
    
    #return the newly vectorized df as well as the vector model
    return vect_df, model

In [31]:
count_vect_df, model = vectorize_tweets(CountVectorizer, 'lemmatized', df)

In [32]:
count_vect_df.head()

Unnamed: 0,11,115,11am,11pm,11th,12,125,125th,126th,127th,...,zogsports,zoloft,zombie,zone,zoned,zoo,zucchini,zuck,zuckerberg,zz
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Drop Unnecessary  Columns

In [50]:
def drop_cols(df, n):
    #create a list of unnecessary columns
    extra_cols = []
    for col in df.columns:
        if df[col].sum() < n: #Where n is the number of times we want a word to appear a minumum number of times
            
            extra_cols.append(col)
            
    df.drop(extra_cols, axis = 1, inplace = True)
            
    return df

In [51]:
len(count_vect_df.columns)*.5

1896.5

In [52]:
short_df = drop_cols(count_vect_df, 5)

In [54]:
short_df.head()

Unnamed: 0,11,11th,12,13,14,14th,15,16,17,18,...,yorker,yorkers,you,young,younger,your,yuji,zadie,zero,zoo
0,0,0,0,0,0,0,0,0,0,1,...,0,0,1,0,0,0,0,0,1,0
1,0,0,0,0,0,0,1,0,1,0,...,0,0,3,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,3,1,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,2,1,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,1,...,0,0,2,0,0,0,0,0,0,0


In [91]:
joined = pd.concat([df, short_df], axis =1)

In [92]:
joined.head()

Unnamed: 0,rest_address,rest_cost,cuisine_type,rest_name,rest_rating,review,lemmatized,11,11th,12,...,yorker,yorkers,you,young,younger,your,yuji,zadie,zero,zoo
0,"\n14 Christopher St\nNew York, NY 10014\n(212)...",4.0,Japanese,Omakase Room By Tatsu,7.7,Tolerance for group trips can vary widely. Som...,Tolerance group trip vary widely Some people i...,0,0,0,...,0,0,1,0,0,0,0,0,1,0
1,"\n428 Greenwich St.\nNew York, NY 10013\n212-2...",4.0,Japanese,Sushi Azabu,8.5,When the apocalypse eventually comes for New Y...,When apocalypse eventually come New York City ...,0,0,0,...,0,0,3,0,0,0,0,0,0,0
2,"\n264 Clinton St\nNew York, NY 11201\n(347) 98...",3.0,Seafood,Saint Julivert Fisherie,7.7,"When you’re young, you don’t have to think muc...",When young think much decision You know attemp...,0,0,0,...,0,0,3,1,0,0,0,0,0,0
3,"\n498 9th Ave\nNew York, NY 10018\n(646) 863-2...",2.0,Russian,Farida,8.0,"There’s a bleak, sweaty place on 34th Street w...",There bleak sweaty place 34th Street adult run...,0,0,0,...,0,0,2,1,0,0,0,0,0,0
4,"\n541-B Myrtle Avenue\nNew York, NY 11205\n(71...",2.0,Sushi,U-Gu,7.7,New York is near an ocean with plenty of free ...,New York near ocean plenty free fish asking ca...,0,0,0,...,0,0,2,0,0,0,0,0,0,0


### One hot encode the cuisine columns

In [93]:
#get dummies for cuisine
joined = pd.get_dummies(joined, columns=['cuisine_type'], drop_first=True)

In [94]:
joined.drop(['rest_address'], axis=1, inplace=True)

In [95]:
joined.head()

Unnamed: 0,rest_cost,rest_name,rest_rating,review,lemmatized,11,11th,12,13,14,...,cuisine_type_Spanish,cuisine_type_Steaks,cuisine_type_Sushi,cuisine_type_Tacos,cuisine_type_Taiwanese,cuisine_type_Tex-Mex,cuisine_type_Thai,cuisine_type_Vegetarian,cuisine_type_Vietnamese,cuisine_type_Wine Bar
0,4.0,Omakase Room By Tatsu,7.7,Tolerance for group trips can vary widely. Som...,Tolerance group trip vary widely Some people i...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,4.0,Sushi Azabu,8.5,When the apocalypse eventually comes for New Y...,When apocalypse eventually come New York City ...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3.0,Saint Julivert Fisherie,7.7,"When you’re young, you don’t have to think muc...",When young think much decision You know attemp...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2.0,Farida,8.0,"There’s a bleak, sweaty place on 34th Street w...",There bleak sweaty place 34th Street adult run...,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2.0,U-Gu,7.7,New York is near an ocean with plenty of free ...,New York near ocean plenty free fish asking ca...,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0


In [96]:
df_to_use = joined.drop(['review', 'lemmatized'], axis = 1)

In [98]:
df_to_use.head()

Unnamed: 0,rest_cost,rest_name,rest_rating,11,11th,12,13,14,14th,15,...,cuisine_type_Spanish,cuisine_type_Steaks,cuisine_type_Sushi,cuisine_type_Tacos,cuisine_type_Taiwanese,cuisine_type_Tex-Mex,cuisine_type_Thai,cuisine_type_Vegetarian,cuisine_type_Vietnamese,cuisine_type_Wine Bar
0,4.0,Omakase Room By Tatsu,7.7,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,4.0,Sushi Azabu,8.5,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2,3.0,Saint Julivert Fisherie,7.7,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2.0,Farida,8.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2.0,U-Gu,7.7,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0


In [100]:
df_to_use.to_csv('./Data/data_for_reccommender_1.csv', index = False)

# Clean and Vectorize the newer dataset

In [171]:
df = pd.read_csv('./Data/reviews_for_model.csv')

In [172]:
df.head()

Unnamed: 0,address,cost,cuisine,name,rating,review,address_only,rest_borough,rest_zip_code
0,"\n2750 Broadway\nNew York, NY 10025\n(212) 510...",2.0,Pizza,Mama’s Too,8.3,A typical NYC slice shop has a few basic eleme...,"2750 Broadway, New York, NY 10025",Manhattan,10025
1,"\n14 Christopher St\nNew York, NY 10014\n(212)...",4.0,Japanese,Omakase Room By Tatsu,7.7,Tolerance for group trips can vary widely. Som...,"14 Christopher St, New York, NY 10014",Manhattan,10014
2,"\n428 Greenwich St.\nNew York, NY 10013\n212-2...",4.0,Japanese,Sushi Azabu,8.5,When the apocalypse eventually comes for New Y...,"428 Greenwich St., New York, NY 10013",Manhattan,10013
3,"\n264 Clinton St\nNew York, NY 11201\n(347) 98...",3.0,Seafood,Saint Julivert Fisherie,7.7,"When you’re young, you don’t have to think muc...","264 Clinton St, New York, NY 11201",Brooklyn,11201
4,"\n498 9th Ave\nNew York, NY 10018\n(646) 863-2...",2.0,Russian,Farida,8.0,"There’s a bleak, sweaty place on 34th Street w...","498 9th Ave, New York, NY 10018",Manhattan,10018


In [173]:
df.rename({'cost': 'rest_cost', 'name':'rest_name', 'rating': 'rest_rating', 'review': 'rest_review', 'cuisine': 'cuisine_type'}, axis = 1, inplace=True)

In [174]:
df.columns

Index(['address', 'rest_cost', 'cuisine_type', 'rest_name', 'rest_rating',
       'rest_review', 'address_only', 'rest_borough', 'rest_zip_code'],
      dtype='object')

In [175]:
#drop address column
df.drop('address', axis = 1, inplace=True)

In [176]:
col_lemmatized = lemmatized_column(df, df['rest_review'])

In [177]:
df.head()

Unnamed: 0,rest_cost,cuisine_type,rest_name,rest_rating,rest_review,address_only,rest_borough,rest_zip_code,lemmatized
0,2.0,Pizza,Mama’s Too,8.3,A typical NYC slice shop has a few basic eleme...,"2750 Broadway, New York, NY 10025",Manhattan,10025,typical nyc slice shop basic element counter g...
1,4.0,Japanese,Omakase Room By Tatsu,7.7,Tolerance for group trips can vary widely. Som...,"14 Christopher St, New York, NY 10014",Manhattan,10014,tolerance group trip vary widely people intern...
2,4.0,Japanese,Sushi Azabu,8.5,When the apocalypse eventually comes for New Y...,"428 Greenwich St., New York, NY 10013",Manhattan,10013,apocalypse eventually come new york city going...
3,3.0,Seafood,Saint Julivert Fisherie,7.7,"When you’re young, you don’t have to think muc...","264 Clinton St, New York, NY 11201",Brooklyn,11201,young think much decision know attempt deep en...
4,2.0,Russian,Farida,8.0,"There’s a bleak, sweaty place on 34th Street w...","498 9th Ave, New York, NY 10018",Manhattan,10018,bleak sweaty place 34th street adult run 6 yar...


In [178]:
#Try with TFIDF Vectorizer
tfidf_df, tfidf_model = vectorize_tweets(TfidfVectorizer, 'lemmatized', df)

In [195]:
tfidf_df.head()

Unnamed: 0,11,115,11am,11pm,11th,12,125,125th,126th,127th,...,zogsports,zoloft,zombie,zone,zoned,zoo,zucchini,zuck,zuckerberg,zz
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [180]:
tfidf_model

TfidfVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.float64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), norm='l2', preprocessor=None, smooth_idf=True,
        stop_words=None, strip_accents=None, sublinear_tf=False,
        token_pattern='(?u)\\b\\w\\w+\\b', tokenizer=None, use_idf=True,
        vocabulary=None)

Index(['11', '115', '11am', '11pm', '11th', '12', '125', '125th', '126th',
       '127th',
       ...
       'zogsports', 'zoloft', 'zombie', 'zone', 'zoned', 'zoo', 'zucchini',
       'zuck', 'zuckerberg', 'zz'],
      dtype='object', length=12204)

In [211]:
df.to_csv('./Data/Lemmatized_df.csv', index = False)

In [167]:
#Merge the TFIDF DF with the original df:

In [196]:
joined = pd.concat([df, tfidf_df], axis =1)

In [197]:
joined.head()

Unnamed: 0,rest_cost,cuisine_type,rest_name,rest_rating,rest_review,address_only,rest_borough,rest_zip_code,lemmatized,11,...,zogsports,zoloft,zombie,zone,zoned,zoo,zucchini,zuck,zuckerberg,zz
0,2.0,Pizza,Mama’s Too,8.3,A typical NYC slice shop has a few basic eleme...,"2750 Broadway, New York, NY 10025",Manhattan,10025,typical nyc slice shop basic element counter g...,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4.0,Japanese,Omakase Room By Tatsu,7.7,Tolerance for group trips can vary widely. Som...,"14 Christopher St, New York, NY 10014",Manhattan,10014,tolerance group trip vary widely people intern...,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4.0,Japanese,Sushi Azabu,8.5,When the apocalypse eventually comes for New Y...,"428 Greenwich St., New York, NY 10013",Manhattan,10013,apocalypse eventually come new york city going...,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3.0,Seafood,Saint Julivert Fisherie,7.7,"When you’re young, you don’t have to think muc...","264 Clinton St, New York, NY 11201",Brooklyn,11201,young think much decision know attempt deep en...,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2.0,Russian,Farida,8.0,"There’s a bleak, sweaty place on 34th Street w...","498 9th Ave, New York, NY 10018",Manhattan,10018,bleak sweaty place 34th street adult run 6 yar...,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [207]:
#save the joined df to use in recommender
joined.to_csv('./Data/undummied_df.csv')

In [201]:
#get dummies for cuisine, but keep the first column
dummied = pd.get_dummies(joined, columns=['cuisine_type'])

In [202]:
dummied = pd.get_dummies(dummied, columns = ['rest_borough'])

In [203]:
dummied.head()

Unnamed: 0,rest_cost,rest_name,rest_rating,rest_review,address_only,rest_zip_code,lemmatized,11,115,11am,...,cuisine_type_Vegetarian,cuisine_type_Vietnamese,cuisine_type_Wine Bar,rest_borough_Bronx,rest_borough_Brooklyn,rest_borough_Jersey City,rest_borough_Manhattan,rest_borough_Queens,rest_borough_Staten Island,rest_borough_Westchester
0,2.0,Mama’s Too,8.3,A typical NYC slice shop has a few basic eleme...,"2750 Broadway, New York, NY 10025",10025,typical nyc slice shop basic element counter g...,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,0
1,4.0,Omakase Room By Tatsu,7.7,Tolerance for group trips can vary widely. Som...,"14 Christopher St, New York, NY 10014",10014,tolerance group trip vary widely people intern...,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,0
2,4.0,Sushi Azabu,8.5,When the apocalypse eventually comes for New Y...,"428 Greenwich St., New York, NY 10013",10013,apocalypse eventually come new york city going...,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,0
3,3.0,Saint Julivert Fisherie,7.7,"When you’re young, you don’t have to think muc...","264 Clinton St, New York, NY 11201",11201,young think much decision know attempt deep en...,0.0,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0
4,2.0,Farida,8.0,"There’s a bleak, sweaty place on 34th Street w...","498 9th Ave, New York, NY 10018",10018,bleak sweaty place 34th street adult run 6 yar...,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,0


In [204]:
#Final DF
final = dummied.drop(['rest_review', 'lemmatized'], axis = 1)

In [205]:
final.head()

Unnamed: 0,rest_cost,rest_name,rest_rating,address_only,rest_zip_code,11,115,11am,11pm,11th,...,cuisine_type_Vegetarian,cuisine_type_Vietnamese,cuisine_type_Wine Bar,rest_borough_Bronx,rest_borough_Brooklyn,rest_borough_Jersey City,rest_borough_Manhattan,rest_borough_Queens,rest_borough_Staten Island,rest_borough_Westchester
0,2.0,Mama’s Too,8.3,"2750 Broadway, New York, NY 10025",10025,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,0
1,4.0,Omakase Room By Tatsu,7.7,"14 Christopher St, New York, NY 10014",10014,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,0
2,4.0,Sushi Azabu,8.5,"428 Greenwich St., New York, NY 10013",10013,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,0
3,3.0,Saint Julivert Fisherie,7.7,"264 Clinton St, New York, NY 11201",11201,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,1,0,0,0,0,0
4,2.0,Farida,8.0,"498 9th Ave, New York, NY 10018",10018,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,1,0,0,0


In [206]:
final.to_csv('./Data/tfidf_df.csv', index=False)

In [186]:
# Pickle the TFIDF transformer:
import pickle

pickle.dump(tfidf_model, open('tfidf_model.sav', 'wb'))