In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import numpy as np
import scipy as sp
import pandas as pd
import geopandas as gpd
import geoplot as gplt
from shapely.geometry import Point
import shapely
import reverse_geocoder as rg

import sklearn as sk
import matplotlib as mpl
import matplotlib.pylab as plt
import matplotlib.font_manager as fm
from mpl_toolkits.mplot3d import Axes3D

import seaborn as sns
sns.set(rc={'figure.figsize':(13.7,10.27)})
sns.set_style("whitegrid")
sns.set_color_codes()

In [2]:
from dask.distributed import Client
import dask.bag as db
import dask.dataframe as dd
import dask.array as da
import dask

from ast import literal_eval
from collections.abc import MutableMapping
from collections import Counter
from functools import lru_cache
# import h5py
import io
from math import sin, cos, sqrt, atan2, radians
import os

from rake_nltk import Rake
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [3]:
Client()

0,1
Client  Scheduler: tcp://127.0.0.1:38365  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 4  Cores: 4  Memory: 20.70 GB


In [4]:
# client = Client(n_workers=8)
# client

# Load all datasets

## Respective schemas

In [5]:
users={'userName': str,
      'jobs': object,
      'currentPlace': object,
      'previousPlaces': object,
      'education': object,
      'gPlusUserId': str}
reviews={'rating': float,
       'reviewerName': str,
       'reviewText': str,
       'categories': object,
       'gPlusPlaceId': str,
       'unixReviewTime': object,
       'reviewTime': str,
       'gPlusUserId': str}
places={'name': str,
       'price': str,
       'address': str,
       'hours': object,
       'phone': str,
       'closed': bool,
       'gPlusPlaceId': str,
       'gps': object}

# del users, reviews, places

## Fastload of smaller files

In [9]:
%%time

# 1 min

users_df = dd.read_csv('users/*.csv',
                  blocksize='128MiB',
                  dtype=users,
                  encoding='utf-8',
                  lineterminator='\n').compute()
reviews_df = dd.read_csv('reviews_restaurants/*.csv',
                  blocksize='32MiB',
                  dtype=reviews,
                  encoding='utf-8',
                  lineterminator='\n').compute()
places_df = dd.read_csv('places/*.csv',
                  blocksize='64MiB',
                  dtype=places,
                  encoding='utf-8',
                  lineterminator='\n').compute()
# del users_df, reviews_df, places_df

CPU times: user 11.2 s, sys: 4.96 s, total: 16.2 s
Wall time: 42.6 s


# Merge datasets

- Users: currentPlace => user_lat, user_long
- Places: gps => places_lat, places_long

In [13]:
print('Users: ', users_df.columns.to_list())
print('Places: ', places_df.columns.to_list())
print('Reviews: ', reviews_df.columns.to_list(), '\n')

# extract userName, currentPlace, gPlusUserId from users
users_df2 = users_df.drop(['jobs', 'previousPlaces', 'education'], axis=1)

# extract name, price, address, gPlusPlaceId, gps
places_df2 = places_df.drop(['hours', 'closed'], axis=1)

# extract rating, reviewerName, categories, gPlusPlaceId, gPlusUserId
reviews_df2 = reviews_df.drop(['reviewText', 'unixReviewTime', 'reviewTime'], axis=1)

print('After dropping...')
print('Users: ', users_df2.columns.to_list())
print('Places: ', places_df2.columns.to_list())
print('Reviews: ', reviews_df2.columns.to_list())

# users_df2
# places_df2
reviews_df2.head()
# del users_df2, places_df2, reviews_df2

Users:  ['userName', 'jobs', 'currentPlace', 'previousPlaces', 'education', 'gPlusUserId']
Places:  ['name', 'price', 'address', 'hours', 'phone', 'closed', 'gPlusPlaceId', 'gps']
Reviews:  ['rating', 'reviewerName', 'reviewText', 'categories', 'gPlusPlaceId', 'unixReviewTime', 'reviewTime', 'gPlusUserId'] 

After dropping...
Users:  ['userName', 'currentPlace', 'gPlusUserId']
Places:  ['name', 'price', 'address', 'phone', 'gPlusPlaceId', 'gps']
Reviews:  ['rating', 'reviewerName', 'categories', 'gPlusPlaceId', 'gPlusUserId']


Unnamed: 0,rating,reviewerName,categories,gPlusPlaceId,gPlusUserId
0,4.0,william spindler,"['Asian Restaurant', 'Chinese Restaurant']",106591714648856494903,100000032416892623125
1,5.0,william spindler,"['European Restaurant', 'Italian Restaurant', ...",109420033090810328045,100000032416892623125
2,5.0,william spindler,['Barbecue Restaurant'],111623070919810985923,100000032416892623125
3,4.0,william spindler,['Restaurant'],113854191152597312098,100000032416892623125
4,5.0,william spindler,['Mexican Restaurant'],115827996910815192564,100000032416892623125


## Users: currentPlace column => Lat, Long columns

In [14]:
%%time

def string_to_coord(currentPlace):
    coord = None
    lat = long = None
    try:
        coord = eval(currentPlace)[1][1:3]
        lat, long = coord[0]/10000000, coord[1]/10000000
    # bad practice
    except:
        pass
    return (lat, long)

users_df2['Coordinates'] = users_df2['currentPlace'].map(string_to_coord)
users_df2['user_lat'] = users_df2['Coordinates'].apply(lambda x : x[0] if x else None)
users_df2['user_long'] = users_df2['Coordinates'].apply(lambda x : x[1] if x else None)

# rename userName -> reviewerName
users_df2 = users_df2.rename(columns={'userName': 'reviewerName'})
users_df2.head()

CPU times: user 12.9 s, sys: 196 ms, total: 13.1 s
Wall time: 13 s


Unnamed: 0,reviewerName,currentPlace,gPlusUserId,Coordinates,user_lat,user_long
0,an lam,"['Thành phố Hồ Chí Minh, Việt Nam', [[], 10823...",100000010817154263736,"(10.823099, 106.629664)",10.823099,106.629664
1,HALİL TURGUT,"['Adana', [[], 370000000, 353213330, 1]]",100000013500285534661,"(37.0, 35.321333)",37.0,35.321333
2,森田さとこ,,100000021336848867366,"(None, None)",,
3,amey kore,,100000030557048145331,"(None, None)",,
4,william spindler,,100000032416892623125,"(None, None)",,


#### Make sure the lat, long coordinates are OK

In [16]:
users_df2.describe()

Unnamed: 0,user_lat,user_long
count,737639.0,737639.0
mean,32.130619,-31.866851
std,19.997143,75.8566
min,-90.0,-176.476908
25%,27.949436,-90.511764
50%,37.339386,-73.561504
75%,43.557952,17.10731
max,90.0,179.472656


## Places: Gps column => Lat, Long columns

In [17]:
%%time

def string_to_coord2(gps):
    if type(gps) is not str:
        return None, None
    coord = eval(gps)
    lat, long = coord[0], coord[1]
    if abs(lat) > 90 or abs(long) > 180:
        lat = lat / 1000000
        long = long / 1000000
    return lat, long

places_df2['Coordinates'] = places_df2['gps'].map(string_to_coord2)
places_df2['place_lat'] = places_df2['Coordinates'].apply(lambda x : x[0] if x else None)
places_df2['place_long'] = places_df2['Coordinates'].apply(lambda x : x[1] if x else None)

places_df2 = places_df2.rename(columns={'name': 'placeName'})
places_df2.head()

CPU times: user 22.9 s, sys: 198 ms, total: 23.1 s
Wall time: 23 s


Unnamed: 0,placeName,price,address,phone,gPlusPlaceId,gps,Coordinates,place_lat,place_long
0,Diamond Valley Lake Marina,,"['2615 Angler Ave', 'Hemet, CA 92545']",(951) 926-7201,104699454385822125632,"[33.703804, -117.003209]","(33.703804, -117.003209)",33.703804,-117.003209
1,Blue Ribbon Cleaners,,"['Parole', 'Annapolis, MD']",(410) 266-6123,103054478949000078829,"[38.979759, -76.547538]","(38.979759, -76.547538)",38.979759,-76.547538
2,Portofino,,"['ул. Тутаева, 1', 'Nazran, Ingushetia, Russia...",8 (963) 173-38-38,109810290098030327104,"[43.22776, 44.762726]","(43.22776, 44.762726)",43.22776,44.762726
3,T C's Referee Sports Bar,$$,"['5322 W 26th St', 'Sioux Falls, SD 57106']",(605) 361-2208,100327153115986850675,"[43.529494, -96.792244]","(43.529494, -96.792244)",43.529494,-96.792244
4,Carrefour - Palembang Square,,"['Jl. Angkatan 45', 'Kompleks Palembang Square...",(0711) 359918,103368487323937936043,"[-2.976256, 104.742662]","(-2.976256, 104.742662)",-2.976256,104.742662


#### Make sure the lat and long coordinates are OK

In [18]:
places_df2.describe()

Unnamed: 0,place_lat,place_long
count,3087402.0,3087402.0
mean,34.12425,-20.93909
std,18.90256,80.42328
min,-90.0,-180.0
25%,30.1994,-86.70596
50%,38.75203,-15.94177
75%,45.14825,19.88366
max,82.51684,180.0


## Drop redundant columns and join

In [19]:
%%time
users_df3 = users_df2.drop(['currentPlace', 'Coordinates'], axis=1)
places_df3 = places_df2.drop(['gps', 'Coordinates'], axis=1)

joined_df = reviews_df2.merge(users_df3,
                            how='left',
                            on='gPlusUserId',
                            suffixes=('_left', '_right')).merge(places_df3,
                                                               how='left',
                                                               on='gPlusPlaceId')

# del users_df3, places_df3
# del joined_df

joined_df.tail()

CPU times: user 15.4 s, sys: 596 ms, total: 16 s
Wall time: 15.8 s


Unnamed: 0,rating,reviewerName_left,categories,gPlusPlaceId,gPlusUserId,reviewerName_right,user_lat,user_long,placeName,price,address,phone,place_lat,place_long
4110767,2.0,charles mckinney,"['Hamburger Restaurant', 'Fast Food Restaurant']",116458473784504954830,118446742455312620560,,,,Fatburger,,"['6780 Cherry Ave', 'Long Beach, CA 90805']",(562) 408-1311,33.878048,-118.168365
4110768,2.0,charles mckinney,"['Mexican Restaurant', 'Latin American Restaur...",117332598175065149705,118446742455312620560,,,,Super Mex,,"['5660 Atlantic Ave', 'Long Beach, CA 90805']",(562) 428-7144,33.85925,-118.184753
4110769,4.0,charles mckinney,"['Hot Dog Restaurant', 'Takeout Restaurant', '...",117868066122653879601,118446742455312620560,,,,Wienerschnitzel,$$$,"['1300 E Rosecrans Ave', 'Compton, CA 90221']",(310) 639-8017,33.903287,-118.209676
4110770,1.0,charles mckinney,"['Buffet Restaurant', 'American Restaurant']",117952004983617019485,118446742455312620560,,,,HomeTown Buffet,$$,"['3102 E Imperial Hwy', 'Lynwood, CA 90262']",(310) 537-6411,33.930282,-118.216058
4110771,1.0,charles mckinney,"['Mexican Restaurant', 'Latin American Restaur...",118224418815687780032,118446742455312620560,,,,El Gallo Giro,$$$,"['7148 Pacific Blvd', 'Huntington Park, CA 902...",(323) 585-4433,33.974378,-118.224995


# Analyze and preprocess

In [20]:
joined_df.count()

rating                4110772
reviewerName_left     4110772
categories            4110772
gPlusPlaceId          4110772
gPlusUserId           4110772
reviewerName_right    3321369
user_lat              1022203
user_long             1022203
placeName             4109855
price                 1870612
address               4109855
phone                 3908524
place_lat             4102647
place_long            4102647
dtype: int64

In [21]:
# drop restaurants without gps coordinates
joined_df = joined_df.dropna(subset=['place_lat', 'place_long'])

# drop reviewerName_right from the users dataset
joined_df = joined_df.drop('reviewerName_right', axis=1)
joined_df = joined_df.rename(columns={'reviewerName_left': 'reviewerName'})

# Write joined_df to csv

In [22]:
joined_df.to_csv('data/joined_df.csv', index=False)

## Let's try focusing on just the United States for now

We need `us_cities.csv` as a custom dataset for reverse_geocoder

Save as `us_cities_rg.csv` for later

In [23]:
%%time
# https://raw.githubusercontent.com/kelvins/US-Cities-Database/main/csv/us_cities.csv
us_cities = dd.read_csv('data/us_cities.csv').compute()

# drop unneeded column
us_cities = us_cities.drop(['ID'], axis=1)

# rename and rearrange into the following format
# lat,lon,name,admin1,admin2,cc
# cc is country_code

# rename
us_cities = us_cities.rename(columns={'LONGITUDE': 'lon',
                                      'LATITUDE': 'lat',
                                      'CITY': 'name',
                                      'STATE_NAME': 'admin1',
                                      'COUNTY': 'admin2'})
# rearrange
us_cities = us_cities[['lat', 'lon', 'name', 'admin1', 'admin2']]
us_cities = us_cities.assign(cc='US')

# save as csv
us_cities.to_csv('data/us_cities_rg.csv', index=False)

del us_cities

CPU times: user 150 ms, sys: 4.47 ms, total: 155 ms
Wall time: 208 ms


In [24]:
us_cities = dd.read_csv('data/us_cities.csv').compute()
us_cities.loc[(us_cities['CITY'] == 'San Jose') & (us_cities['STATE_CODE'] == 'CA')][['LATITUDE', 'LONGITUDE']]

Unnamed: 0,LATITUDE,LONGITUDE
2651,37.3894,-121.8868


In [25]:
geo = rg.RGeocoder(mode=2,
                   verbose=True,
                   stream=io.StringIO(open('data/us_cities_rg.csv',
                                           encoding='utf-8').read()))

contiguous_usa = gpd.read_file(gplt.datasets.get_path('contiguous_usa'))

# check if a gps coord is within the contiguous usa
def within_usa(df):
    result = []
    for _, row in df.iterrows():
        lat = row['place_lat']
        long = row['place_long']
        result.append(any(contiguous_usa.contains(Point(long, lat))))
    return result

# del geo, contiguous_usa

contiguous_usa.head()

Unnamed: 0,state,adm1_code,population,geometry
0,Minnesota,USA-3514,5303925,"POLYGON ((-89.59941 48.01027, -89.48888 48.013..."
1,Montana,USA-3515,989415,"POLYGON ((-111.19419 44.56116, -111.29155 44.7..."
2,North Dakota,USA-3516,672591,"POLYGON ((-96.60136 46.35136, -96.53891 46.199..."
3,Idaho,USA-3518,1567582,"POLYGON ((-111.04973 44.48816, -111.05025 42.0..."
4,Washington,USA-3519,6724540,"POLYGON ((-116.99807 46.33017, -116.90653 46.1..."


#### Check if the point lies within the contiguous_usa polygon

https://github.com/ResidentMario/geoplot-data

In [9]:
%%time

# filter for reviews within the usa only
# usa_df = joined_df.loc[within_usa]

# runtime 20 min
# usa_df.reset_index(inplace=True, drop=True)

# usa_df.to_csv('data/usa_df.csv', index=False)

CPU times: user 8 µs, sys: 0 ns, total: 8 µs
Wall time: 14.8 µs


In [31]:
%%time
usa_meta={'index': int,
         'rating': float,
         'reviewerName': str,
         'categories': object,
         'gPlusPlaceId': str,
         'gPlusUserId': str,
         'user_lat': float,
         'user_long': float,
         'placeName': str,
         'price': str,
         'address': object,
         'place_lat': float,
         'place_long': float,
         'geometry': object}
usa_df = dd.read_csv('data/usa_df.csv', dtype=usa_meta, blocksize='64MiB').compute()
usa_df.reset_index(inplace=True, drop=True)
usa_df.tail()

CPU times: user 1.68 s, sys: 437 ms, total: 2.11 s
Wall time: 5.55 s


Unnamed: 0,rating,reviewerName,categories,gPlusPlaceId,gPlusUserId,user_lat,user_long,placeName,price,address,place_lat,place_long,phone
2087457,2.0,charles mckinney,"['Cafe', 'Dessert Shop', 'Ice Cream Shop']",103677375949725777579,118446742455312620560,,,Paleteria Y Neveria Nayarit,,"['3908 Tweedy Blvd', 'South Gate, CA 90280']",33.943796,-118.202867,(323) 566-4212
2087458,1.0,charles mckinney,['Chinese Restaurant'],106403740896858581667,118446742455312620560,,,China Town Express,,"['1900 N Long Beach Blvd', 'Compton, CA 90221']",33.90872,-118.207862,(310) 639-2888
2087459,2.0,charles mckinney,"['Chicken Restaurant', 'Fast Food Restaurant']",106610793969162635614,118446742455312620560,,,Popeye's Chicken & Biscuits,$$$,"['451 E El Segundo Blvd', 'Los Angeles, CA 900...",33.916671,-118.265581,(323) 779-6847
2087460,5.0,charles mckinney,"['Mexican Restaurant', 'Fast Food Restaurant']",107288398358014433213,118446742455312620560,,,Del Taco - Compton #1080,$$$,"['1641 S Alameda St', 'Compton, CA 90220']",33.879051,-118.218163,(310) 608-2325
2087461,5.0,charles mckinney,"['American Restaurant', 'Fast Food Restaurant'...",112245364569571982740,118446742455312620560,,,Durango Burgers,,"['1300 S Long Beach Blvd', 'Compton, CA 90221']",33.884631,-118.205445,(310) 638-1959


In [33]:
usa_df.count()

rating          2087462
reviewerName    2087462
categories      2087462
gPlusPlaceId    2087462
gPlusUserId     2087462
user_lat         534543
user_long        534543
placeName       2087462
price           1618354
address         2087462
place_lat       2087462
place_long      2087462
phone           2050011
dtype: int64

#### Make sure the (lat, long) coordinates are OK

In [34]:
usa_df.describe()

Unnamed: 0,rating,user_lat,user_long,place_lat,place_long
count,2087462.0,534543.0,534543.0,2087462.0,2087462.0
mean,3.970123,37.484354,-90.890976,37.53724,-93.88105
std,1.174481,7.265882,31.383655,5.062171,17.13752
min,0.0,-90.0,-175.198242,25.29142,-124.4993
25%,3.0,33.89468,-112.074037,33.80292,-111.8123
50%,4.0,38.22061,-89.018722,38.44145,-87.97914
75%,5.0,41.280411,-79.930922,41.08796,-80.1154
max,5.0,90.0,178.0,49.3452,-67.25381


### Derive the state, city, and county for each location

In [35]:
lat = usa_df['place_lat']
long = usa_df['place_long']
usa_geo = geo.query(list(zip(lat.to_list(), long.to_list())))
city = []
county = []
state = []
for obj in usa_geo:
    city.append(obj['name'])
    county.append(obj['admin2'])
    state.append(obj['admin1'])
usa_df['state'] = pd.Series(state)
usa_df['county'] = pd.Series(county)
usa_df['city'] = pd.Series(city)
del state, county, city, usa_geo, long, lat

### Which states have the most reviews?

In [36]:
usa_df['state'].value_counts().head()

California    324612
Texas         204794
New York      166443
Florida       140898
Illinois      112808
Name: state, dtype: int64

### Prepare the USA dataset for content-based recommendation + CLI

- Drop unnused columns
- Use Rake to determine keywords
- Split the files into fast-loading chunks

In [37]:
usa_df_content = usa_df.drop(columns=['reviewerName', 'gPlusUserId', 'user_lat', 'user_long'])
usa_df_content.tail()

Unnamed: 0,rating,categories,gPlusPlaceId,placeName,price,address,place_lat,place_long,phone,state,county,city
2087457,2.0,"['Cafe', 'Dessert Shop', 'Ice Cream Shop']",103677375949725777579,Paleteria Y Neveria Nayarit,,"['3908 Tweedy Blvd', 'South Gate, CA 90280']",33.943796,-118.202867,(323) 566-4212,California,Los Angeles,South Gate
2087458,1.0,['Chinese Restaurant'],106403740896858581667,China Town Express,,"['1900 N Long Beach Blvd', 'Compton, CA 90221']",33.90872,-118.207862,(310) 639-2888,California,Los Angeles,Lynwood
2087459,2.0,"['Chicken Restaurant', 'Fast Food Restaurant']",106610793969162635614,Popeye's Chicken & Biscuits,$$$,"['451 E El Segundo Blvd', 'Los Angeles, CA 900...",33.916671,-118.265581,(323) 779-6847,California,Los Angeles,Compton
2087460,5.0,"['Mexican Restaurant', 'Fast Food Restaurant']",107288398358014433213,Del Taco - Compton #1080,$$$,"['1641 S Alameda St', 'Compton, CA 90220']",33.879051,-118.218163,(310) 608-2325,California,Los Angeles,Compton
2087461,5.0,"['American Restaurant', 'Fast Food Restaurant'...",112245364569571982740,Durango Burgers,,"['1300 S Long Beach Blvd', 'Compton, CA 90221']",33.884631,-118.205445,(310) 638-1959,California,Los Angeles,Compton


In [38]:
usa_df_content.count()

rating          2087462
categories      2087462
gPlusPlaceId    2087462
placeName       2087462
price           1618354
address         2087462
place_lat       2087462
place_long      2087462
phone           2050011
state           2087462
county          2087462
city            2087462
dtype: int64

### Drop some categories

In [39]:
def extract_restaurant_and_pub(obj):
    if obj["categories"]:
        for cat in obj["categories"]:
            if cat.lower() in populars:
                return 1
    return 0

def eval_and_strip(obj):
    arr = eval(obj)
    arr = [ele.strip() for ele in arr]
    return arr

populars = set(['european', 'asian', 'american', 'italian', 'bar', 'pizza',
    'fast food', 'cafe', 'chinese', 'mexican', 'latin american', 'seafood', 'hamburger', 'coffee shop',
    'japanese', 'pub', 'bar & grill', 'steak house', 'sandwich shop', 'bakery', 'barbecue', 'sushi',
    'french', 'indian', 'southeast asian', 'south asian', 'dessert shop',
    'mediterranean', 'takeout', 'deli', 'thai', 'ice cream shop'])

#Function to create bag of words and determine scores for each words
def score_keywords(ls, r=Rake()):
    category_str = ' '.join(ls)
    
    # by default Rake uses english stopwords from NLTK
    # and discards all puntuation characters as well

    @lru_cache
    def extract_words(string):
        # extracting the words by passing the text
        r.extract_keywords_from_text(category_str)

        # getting the dictionary whith key words as keys and their scores as values
        key_words_dict_scores = r.get_word_degrees()

        # assigning the key words to the new column for the corresponding movie
        return ' '.join(list(key_words_dict_scores.keys()))
    
    return extract_words(category_str)

In [40]:
usa_df_content.count()

rating          2087462
categories      2087462
gPlusPlaceId    2087462
placeName       2087462
price           1618354
address         2087462
place_lat       2087462
place_long      2087462
phone           2050011
state           2087462
county          2087462
city            2087462
dtype: int64

#### Drop restaurants with only the generic "Restaurant" category

We drop this category since it doesn't help with our content-based rec sys.

In [41]:
%%time

genre = usa_df_content['categories'].map(lambda x: x.replace('Restaurant', '')).map(eval_and_strip)
usa_df_content['categories'] = pd.Series(genre)

# Extract reviews only whose categories are related to Restaurants, Pubs, or Bars
usa_df_content["target"] = usa_df_content.loc[:, ["categories"]].apply(extract_restaurant_and_pub, axis=1)
usa_df_content = usa_df_content[usa_df_content["target"]==1].drop(["target"], axis=1)

del populars, genre

usa_df_content.count()

CPU times: user 34 s, sys: 940 ms, total: 34.9 s
Wall time: 34.7 s


rating          1957613
categories      1957613
gPlusPlaceId    1957613
placeName       1957613
price           1555191
address         1957613
place_lat       1957613
place_long      1957613
phone           1925379
state           1957613
county          1957613
city            1957613
dtype: int64

In [42]:
%%time
r = Rake()
cat_string = usa_df_content['categories'].map(lambda row: score_keywords(row, r))
usa_df_content['categories'] = cat_string
del r, cat_string

CPU times: user 1min 26s, sys: 1.11 s, total: 1min 27s
Wall time: 1min 27s


In [43]:
usa_df_content.count()

rating          1957613
categories      1957613
gPlusPlaceId    1957613
placeName       1957613
price           1555191
address         1957613
place_lat       1957613
place_long      1957613
phone           1925379
state           1957613
county          1957613
city            1957613
dtype: int64

### Write to files

In [44]:
usa_df_content.to_csv('cli/usa_df_content.csv')