In [None]:
import json
import pandas as pd
import datetime
import scipy.stats
import numpy as np
import re
import seaborn as sb
import math
from scipy.spatial import distance
from textblob import TextBlob

In [None]:
# load data
# did not have any luck uploading the review and user json file
path = '/content/YelpDataset/'
business_df = pd.read_json(path+'yelp_academic_dataset_business.json', lines=True)

In [None]:
# pre-processing -- May -- DONE
# Generated two python dataframe: 
# 1) business_tokeep contains all the businesses with >2yr review history, total len = 141613
# 2) business_pan_eligible contains all businesses with >2yr review history even not counting ones after pandemic started, 
    # with the pandemic start date set as 2020-03-01, total len = 138023

In [None]:
# I loaded the review_df locally
review_df['new_date'] = pd.to_datetime(review_df.date, errors='coerce')
review_newest = review_df[['business_id', 'new_date']].groupby(['business_id']).max()
review_oldest = review_df[['business_id', 'new_date']].groupby(['business_id']).min()
review_duration = review_oldest.join(review_newest, on='business_id', lsuffix='_old', rsuffix='_new')
review_duration['duration'] = review_duration.new_date_new - review_duration.new_date_old

# filter businesses based on review history
business_tokeep = review_duration[review_duration['duration']>datetime.timedelta(days=365*2)]

# further filter businesses to only keep ones with enough history if not counting ones after pandemic started
pan_start = datetime.datetime.fromisoformat('2020-03-01')
business_pan_eligible = business_tokeep.assign(pan_duration = pan_start - business_tokeep['new_date_old'])
business_pan_eligible = business_pan_eligible[business_pan_eligible['pan_duration'] > datetime.timedelta(days=365*2)]
business_pan_eligible = business_pan_eligible.assign(duration_toUse = business_pan_eligible[['pan_duration', 'duration']].min(axis=1))

# convert business_pan_eligible.duration_toUse to the unit of year
business_pan_eligible['duration_toUse'] = business_pan_eligible['duration_toUse'].dt.days.astype('int16')/365

In [None]:
# histogram of businesses review duration
fig, (ax1, ax2) = plt.subplots(1, 2) 
fig.suptitle('Histogram of businesses review history duration', fontsize = 24)
ax1.hist(business_pan_eligible['duration_toUse'], bins=15)
ax1.set_xlabel('Years', fontsize = 18)
ax1.set_ylabel('Count of businesses', fontsize = 18)

ax2.hist(business_pan_eligible['duration_toUse'], bins=15, density=True, cumulative=True)
ax2.set_xlabel('Years', fontsize = 18)
ax2.set_ylabel('Cumulative ratio of businesses', fontsize = 18)

plt.show()

My interpretation of this histogram: a pretty uniform distribution for businesses with review histories < 10 years. Only a selective few (<5%) businesses have a review history of >10 years

In [None]:
# based on the business_id from the business_pan_eligible, filter the review_df
# also filter out reviews made after 2020-03-01
selected_review = review_df[(review_df["business_id"].isin(business_pan_eligible['business_id'])) & 
                           (review_df['date'] < '2020-03-01')]
#len(selected_review) = 7696848

# count number of reviews received by each business from the selected reviews
review_count = selected_review.groupby('business_id').count()

# histogram of the number of reviews received by businesses
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=[20, 8]) 
fig.suptitle('Histogram of review count for selected businesses', fontsize = 24)
ax1.hist(review_count['review_id'], bins=30)
ax1.set_xlabel('Number of reviews received by businesses', fontsize = 18)
ax1.set_ylabel('Count of businesses', fontsize = 18)

ax2.hist(review_count['review_id'], bins=30, density=True, histtype='step', cumulative=True)
ax2.set_xlabel('Number of reviews received by businesses', fontsize = 18)
ax2.set_xscale('log')
ax2.set_yscale('log')
ax2.set_ylabel('Cumulative ratio of businesses', fontsize = 18)

plt.show()


print('the 90th quantile of review count is: ',
      review_count['review_id'].quantile(0.9))

My interpretation of this histogram: over 97% businesses have less than 500 reviews accumulated over the entire duration of yelp history. Over 90% of businesses that fit our selective criteria have less than 129 reviews.

In [None]:
# just for fun, let's fit it with powerlaw and see what's the fitted xmin value
import powerLaw
count = review_count.review_id
fitted = powerlaw.Fit(count)
fitted.xmin

xmin returned here is 424, which makes sense based on the histogram plotted above. If we filter out businesses with more than 424 reviews received, then we will have 2544 businesses to work with. This is something to keep in mind. Currently I pre-processed the review dataset by selecting businesses with in the 90th percentile of review count.

In [None]:
# further pre-processing: only select businesses with more than 129 reviews accumulated
# take businesses with top 10% review counts
# this will still give us 13814 businesses to analyze

further_filtered_business = review_count[review_count['review_id'] >= 
                                         review_count['review_id'].quantile(0.9)].reset_index()

# filter the review dataframe accordingly
further_filtered_reviews = selected_review[selected_review["business_id"].isin(
    further_filtered_business['business_id'])]

# save the further_filtered_review into pickle
further_filtered_reviews.to_pickle('preprocessed_review.pkl')

# groupby and count reviews received by businesses
review_count_new = further_filtered_reviews.groupby('business_id').count()

# now plot the distribution of number of reviews received by businesses after pre-processing
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=[20, 8]) 
fig.suptitle('Histogram of review count for businesses after pre-processing', fontsize = 24)
ax1.hist(review_count_new['review_id'], bins=20)
ax1.set_xlabel('Number of reviews received by businesses', fontsize = 18)
ax1.set_ylabel('Count of businesses', fontsize = 18)
ax1.set_xscale('log')
ax1.set_yscale('log')

ax2.hist(review_count_new['review_id'], bins=20, density=True, cumulative=True, histtype='step')
ax2.set_xlabel('Number of reviews received by businesses', fontsize = 18)
ax2.set_ylabel('Cumulative atio of businesses', fontsize = 18)
ax2.set_xscale('log')
ax2.set_yscale('log')
plt.show()

My interpretation of this plot: still a little bit skewed, which is understandable, since the fitted xmin returned by the powerlaw function is 424, above our cutoff point.

Let's say that we have saved a subset of reviews_df, based on whichever filtering criteria we eventually decide on.

This variable was called further_filtered_reviews and was saved into a pickle file named 'preprocessed_review.pkl'

In [None]:
# Let's pick up from this
preprocessed_review = pd.read_pickle('preprocessed_review.pkl')
# group by business_id and then sort the reviews based on date
# set business_id as index for easier computation later on
preprocessed_review = preprocessed_review.sort_values(by=['business_id','date']).set_index('business_id')

# change the date to TimeDate object
preprocessed_review['date'] = pd.to_datetime(preprocessed_review['date'])

# group by business, and then calculate how long ago each review was made 
# relative to the most-recent one. Result returned as a timedelta object
preprocessed_review_delta = preprocessed_review.assign(delta = preprocessed_review[['date']]
                                                       .groupby('business_id')
                                                       .apply(lambda x: x - x.max()))

# convert the timedelta object calculated into int, in the unit of days
preprocessed_review_delta['delta'] = subset_review_delta['delta'].dt.days

In [None]:
# function to segment by date -- May -- Done
def summarize_column_by_period(dataframe, col, timespan=1, unit='month', count=True):
    """
    this function will call the timedelta_period_convert function
    dataframe: pandas dataframe containing reviews information, 
               should have business_id as its index, and delta in int already
    
    col: str - column name to get summarized info from, e.g. 'stars', 'useful'
             will generated columns for mean, max, min, and sd
    
    timespan: see doc for timedelta_period_convert function
    unit: see doc for timedelta_period_convert function
    count: summarize the number of reviews received during this time frame; default is True
    
    
    RETURN:
    summarized_df: Pandas dataframe with business_id and period as index, and summarized information
    """
    
    df_period = dataframe.assign(period = timedelta_period_convert(
        dataframe['delta'], timespan=timespan, unit=unit))
    
    if count:
        df_period_summarized = df_period.groupby(['business_id', 'period']).agg(
            mean = pd.NamedAgg(column=col, aggfunc=np.mean),
            sd = pd.NamedAgg(column=col, aggfunc=np.std),
            minimum = pd.NamedAgg(column=col, aggfunc=min),
            maximum = pd.NamedAgg(column=col, aggfunc=max),
            review_count = pd.NamedAgg(column='review_id', aggfunc=len)
            )
    else:
        df_period_summarized = df_period.groupby(['business_id', 'period']).agg(
            mean = pd.NamedAgg(column=col, aggfunc=np.mean),
            sd = pd.NamedAgg(column=col, aggfunc=np.std),
            minimum = pd.NamedAgg(column=col, aggfunc=min),
            maximum = pd.NamedAgg(column=col, aggfunc=max)
            )
    
    return df_period_summarized


def timedelta_period_convert(time_series, timespan = 1, unit = 'month'):
    """
    time_series: the delta column of the dataframe - note: needs to be converted to int already
    timespan: an integer, if we want to segment by 3 month, then timespan would be 3
    unit: 'month', 'week', 'days', 'year'
    
    RETURN:
    new_period: a Pandas Series
    """
    unit_dict = {'month':30, 'week': 7, 'days': 1, 'year': 365}
    days = timespan * unit_dict[unit]
    
    new_period = np.floor(time_series/days)
    
    return new_period


For proof of concept, I tried my function on a subset of our filtered review dataframe. To be exact, I tried this on a test dataframe containing reviews from two businesses.

See code below for more info

In [None]:
test_set = {'zzin1d1oHi81GuI0ufo1VA', '--164t1nclzzmca7eDiJMw'}
test_reviews = subset_review[subset_review["business_id"].isin(test_set)]
test_reviews = test_reviews.sort_values(by=['business_id','date']).set_index('business_id')

test_reviews['date'] = pd.to_datetime(test_reviews['date'])

test_review_delta = test_reviews.assign(delta = test_reviews[['date']].groupby('business_id').apply(
lambda x: x - x.max()))

test_review_delta['delta'] = test_review_delta['delta'].dt.days
summarize_column_by_period(test_review_delta, 'stars', timespan=6, unit='month')

In [None]:
## trend analysis (as a function). -- Sophia will do this 
def calculate_regression(dataframe):
    """
    calculates r^2 value of each business over its respective time period
    dataframe: resulting dataframe from summarize_column_by_period
        should have business_id and review_count
    
    RETURN:
    regression_values: Pandas dataframe of business_id and r^2 value
    """
    regression_values = []
    for business_id, new_df in dataframe.groupby(level=0):
        review_column = new_df.loc[:,'review_count']
        num_reviews = review_column.values
        x = num_reviews
        #print(num_reviews)
        y = np.arange(len(num_reviews))
        slope, intercept, r, p, se = ss.linregress(x, y)
        d = {'business_id': business_id,
             'slope': slope,
            'rvalue': r}
        regression_values.append(d)
    regression_values = pd.DataFrame(regression_values)
    return regression_values
  

  # linear regression

In [None]:
fig = plt.figure()
ax = fig.add_subplot(projection='3d')
x = regression_vals['rvalue']
y = regression_vals['slope']
hist, xedges, yedges = np.histogram2d(x, y, bins=5, range=[[-1, 1], [-2.25, 2.25]])
xpos, ypos = np.meshgrid(xedges[:-1] + 0.25, yedges[:-1] + 0.25, indexing="ij")
xpos = xpos.ravel()
ypos = ypos.ravel()
zpos = 0
dx = dy = 0.5 * np.ones_like(zpos)
dz = hist.ravel()
plt.xlabel("R² value")
plt.ylabel('Slope')
ax.set_zlabel('Number of businesses')
plt.title('Slope and R² value for each business, review count timeframe = 6 months')

ax.bar3d(xpos, ypos, zpos, dx, dy, dz, shade = True)

In [None]:
# Code below for chi-square analysis of review count coefficient and whether business is still open
summarized = summarize_column_by_period(preprocessed_review_delta, 'stars', timespan=6, unit='month')
trend = calculate_regression(summarized)

decline_business_id = trend[trend['rvalue']<= -0.5]
decline_business_id.slope.mean()
decline_business = summarized[summarized.index.get_level_values(0).isin(decline_business_id['business_id'])]
decline = business_df[business_df['business_id'].isin(decline_business_id['business_id'])]
decline.is_open.value_counts()


increase_business_id = trend[trend['rvalue']>= 0.5]
increase_business_id.slope.mean()
increase_business = summarized[summarized.index.get_level_values(0).isin(increase_business_id['business_id'])]
increase = business_df[business_df['business_id'].isin(increase_business_id['business_id'])]
booming.is_open.value_counts()


crosstab = np.zeros((2,2))
crosstab[0, 0] = 2395
crosstab[0, 1] = 2594
crosstab[1, 0] = 867
crosstab[1, 1] = 301
chi2, p, dof, ex = ss.chi2_contingency(crosstab, correction=False)

In [None]:
#Romir
df = pd.read_json(path+'yelp_academic_dataset_business.json', lines=True)
#dropna or won't iterate through nan values
df = df.dropna()

#counting all subcategories
categories = []
for l in df["categories"].dropna():
    cat_list = re.findall('[a-zA-Z]+', l)
    if(len(cat_list) > 1):
        categories.append(cat_list)
all_types = []
for lst in categories:
    for i in lst:
        if i not in all_types:
            all_types.append(i)
len(all_types)

#adding new column to the dataframe to include main category
df["main"] = "main"
#assigning correct value to main category
for index, row in df.iterrows():
    if("Active Life" in row["categories"]):
        df.at[index, "main"] = "Active Life"
    if("Arts & Entertainment" in row["categories"]):
        df.at[index, "main"] = "Arts & Entertainment"
    if("Automotive" in row["categories"]):
        df.at[index, "main"] = "Automotive"
    if("Beauty & Spas" in row["categories"]):
        df.at[index, "main"] = "Beauty & Spas"
    if("Education" in row["categories"]):
        df.at[index, "main"] = "Education"
    if("Event Planning & Services" in row["categories"]):
        df.at[index, "main"] = "Event Planning & Services"
    if("Financial Services" in row["categories"]):
        df.at[index, "main"] = "Financial Services"
    if("Food" in row["categories"]):
        df.at[index, "main"] = "Food"
    if("Health & Medical" in row["categories"]):
        df.at[index, "main"] = "Health & Medical"
    if("Home Services" in row["categories"]):
        df.at[index, "main"] = "Home Services"
    if("Hotels & Travel" in row["categories"]):
        df.at[index, "main"] = "Hotels & Travel"
    if("Local Flavor" in row["categories"]):
        df.at[index, "main"] = "Local Flavor"
    if("Local Services" in row["categories"]):
        df.at[index, "main"] = "Local Services"
    if("Mass Media" in row["categories"]):
        df.at[index, "main"] = "Mass Media"
    if("Nightlife" in row["categories"]):
        df.at[index, "main"] = "Nightlife"
    if("Pets" in row["categories"]):
        df.at[index, "main"] = "Pets"
    if("Professional Services" in row["categories"]):
        df.at[index, "main"] = "Professional Services"
    if("Public Services & Government" in row["categories"]):
        df.at[index, "main"] = "Public Services & Government"
    if("Real Estate" in row["categories"]):
        df.at[index, "main"] = "Real Estate"
    if("Religious Organizations" in row["categories"]):
        df.at[index, "main"] = "Religious Organizations"
    if("Restaurants" in row["categories"]):
        df.at[index, "main"] = "Restaurants"
    if("Shopping" in row["categories"]):
        df.at[index, "main"] = "Shopping"

In [None]:
# I turned the code above into a function
# to use this function on a dataframe you would call:
# business_with_main_category = business_df.assign(main_category = assign_category(business_df['categories']))


def assign_category(lists_of_strings):
    """
    lists_of_strings would be a list of strings

    RETURN:
    output: the final main category it belongs to
    """
    for i in lists_of_strings:
        if("Active Life" in i):
            output = "Active Life"
        if("Arts & Entertainment" in i):
            output = "Arts & Entertainment"
        if("Automotive" in i):
            output = "Automotive"
        if("Beauty & Spas" in i):
            output = "Beauty & Spas"
        if("Education" in i):
            output = "Education"
        if("Event Planning & Services" in i):
            output = "Event Planning & Services"
        if("Financial Services" in i):
            output = "Financial Services"
        if("Food" in i):
            output = "Food"
        if("Health & Medical" in i):
            output = "Health & Medical"
        if("Home Services" in i):
            output = "Home Services"
        if("Hotels & Travel" in i):
            output = "Hotels & Travel"
        if("Local Flavor" in i):
            output = "Local Flavor"
        if("Local Services" in i):
            output = "Local Services"
        if("Mass Media" in i):
            output = "Mass Media"
        if("Nightlife" in i):
            output = "Nightlife"
        if("Pets" in i):
            output = "Pets"
        if("Professional Services" in i):
            output = "Professional Services"
        if("Public Services & Government" in i):
            output = "Public Services & Government"
        if("Real Estate" in i):
            output = "Real Estate"
        if("Religious Organizations" in i):
            output = "Religious Organizations"
        if("Restaurants" in i):
            output = "Restaurants"
        if("Shopping" in i):
            output = "Shopping"
    return output

In [None]:
#boxplot of all business main categories and their review count
ax = sb.boxplot(x=df["main"], y=df['review_count'])
ax.set_ylabel("Business Main Category")
ax.set_xlabel("Number of Reviews")
ax.set_title("Distribution of the number of review counts on business categories")
sb.set(rc={'figure.figsize':(40,20)})

In [None]:
#data to be used for knn algorithm
main_cat = ["Active Life", "Arts & Entertainment", "Automotive", "Food", "Beauty & Spas", "Education","Event Planning & Services", "Financial Services", "Health & Medical", "Home Services", "Hotels & Travel", "Local Flavor","Local Services", "Mass Media", "Nightlife", "Pets", "Professional Services", "Public Services & Government", "Real Estate", "Religious Organizations", "Restaurants","Shopping"]
review_count_mean = []
for cat in main_cat:
    review_count_mean.append(df[df["main"] == cat]["review_count"].mean())
#make new dataframe with 2 columns: Main Category, Total Review Count (summing all businesses of the same main type)
data = pd.DataFrame(data={"Main Category": main_cat, "Review Count Mean": review_count_mean})

In [None]:
#defining euclidean distance to be used in knn
def euclidean_distance(row):
    inner_value = 0
    for k in distance_columns:
        inner_value += (row[k] - selected_business[k]) ** 2
    return math.sqrt(inner_value)

In [None]:
#add Closest Neighbor column to data
data["Closest Neighbor"] = "none"
#perform knn for each business main category on data above
for index, row in data.iterrows():
    selected_business = data[data["Main Category"] == row["Main Category"]].iloc[0]
    distance_columns = ["Review Count Mean"]
    #distance of each business to other businesses
    business_distance = data.apply(euclidean_distance, axis=1)
    # Select only the numeric columns from the NBA dataset
    data_numeric = data[distance_columns]
    # Normalize all of the numeric columns
    data_normalized = (data_numeric - data_numeric.mean()) / data_numeric.std()
    
    data_normalized.fillna(0, inplace=True)

    # Find the normalized vector for restaurants.
    business_normalized = data_normalized[data["Main Category"] == row["Main Category"]]

    # Find the distance between selected business and everyone else.
    euclidean_distances = data_normalized.apply(lambda row: distance.euclidean(row, business_normalized), axis=1)

    # Create a new dataframe with distances.
    distance_frame = pd.DataFrame(data={"dist": euclidean_distances, "idx": euclidean_distances.index})
    distance_frame.sort_values("dist", inplace=True)
    # Find the most similar business to selected business (the lowest distance to selected_business is itself, second closest is second_smallest)
    second_smallest = distance_frame.iloc[1]["idx"]
    most_similar_to_business = data.loc[int(second_smallest)]["Main Category"]
    #assign closest neighbor
    data.at[index, "Closest Neighbor"] = most_similar_to_business

In [None]:
#check the dataframe to see closest neighbors of each categories
data

In [None]:
### Project Presentation ###

In [None]:
#Cleaning review data - only keeping reviews made in English

preprocessed_review_delta[preprocessed_review_delta['text'].map(lambda x: x.isascii())]

In [None]:
import seaborn as sns
sns.lineplot(x='date', y='count', hue = 'group', data = daily_summary, palette="flare", legend=False)

In [None]:
#df reviews
#add column that sums up votes of type (useful + funny + cool) to: total_votes
df["total_votes"] = df["useful"] + df["funny"] + df["cool"]
#mean of total votes is about 2.17
df_built["total_votes"].mean()
#top 5% of votes 
top_pct = df[df["total_votes"] >= df["total_votes"].quantile(0.95)].reset_index()
#if 10% -> 11; if 5% -> 19.13
top_pct["total_votes"].mean()

#add sentiment column
df["sentiment"] = df[["text"]].applymap(lambda x: TextBlob(x).sentiment.polarity)

In [None]:
## code to generate features for machine learning model
# 'cleaned_review.pkl' is what I saved after running Sophia's one line to drop all the unintepretable reviews
# should contain
reviews_df = pd.read_pickle('cleaned_review.pkl') # this is the main dataframe

reviews_df['total_votes'] = reviews_df[['useful', 'funny', 'cool']].sum(axis=1)
reviews = reviews_df.reset_index().set_index('review_id')
reviews.date = pd.to_datetime(reviews.date)

# get average ratings for the previous 6m of the review
shift_stars = reviews[['stars']].shift(1)
reviews_subset = reviews[['business_id', 'date']].join(shift_stars)
rolling_stars = reviews_subset.groupby('business_id').rolling('180D', on='date').mean()
    # since this value for the "first" review is always going to be empty
    # fill it with the average review of the restaurant across the entire time
rolling_stars['stars'] = rolling_stars[['stars']].groupby('business_id').transform(lambda x: x.fillna(x.mean()))

rolling_stars = rolling_stars.reset_index()
reviews_df = reviews_df.reset_index()

reviews_df['prev6M_rating'] = rolling_stars['stars']

# get total count of reviews for previous 6m
temp = rolling_stars.groupby('business_id').rolling('180D', on='date').count()
reviews_df['prev6M_count'] = temp.drop(columns='business_id').reset_index().stars - 1

## code to generate the target for machine learning model
# the target is projected rating for the next 6 months
# summarize based on a forward-looking window
subset1 = reviews_df[['business_id', 'date', 'review_id', 'stars']]
subset1['date'] = pd.to_datetime(subset1['date'])
subset1 = subset1.set_index(['business_id', 'date', 'review_id'])
# Ended up using a rather crude way, looking 20 rows ahead at a time
# doing it based on actual date takes a long time - but can be worked on in the future
indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=20)
subset1_forward = subset1.groupby('business_id').rolling(window=indexer, min_periods=1).mean()

reviews_df['projected6M_rating'] = subset1_forward.reset_index().stars


In [None]:
# Load the pickle containing the sentiment analyses results as reviews_df:
# seperate out the NLTK sentiment vector result into four columns
reviews_df['compound'] = reviews_df[['polarity_score']].applymap(lambda x: x['compound'], na_action='ignore')
reviews_df['neg'] = reviews_df[['polarity_score']].applymap(lambda x: x['neg'], na_action='ignore')
reviews_df['neu'] = reviews_df[['polarity_score']].applymap(lambda x: x['neu'], na_action='ignore')
reviews_df['pos'] = reviews_df[['polarity_score']].applymap(lambda x: x['pos'], na_action='ignore')

In [None]:
### Machine learning model
training_data = reviews_df[['business_id', 'total_votes', 'prev6M_rating', 'prev6M_count', 'compound',
                           'neg', 'neu', 'pos']]
testing_data = reviews_df[['projected6M_rating']]

In [None]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
# need to label businesses
from sklearn import preprocessing

le = preprocessing.LabelEncoder()
le.fit(reviews_df['business_id'])
reviews_df['business_encoded'] = le.transform(reviews_df['business_id'])

In [None]:
# on another thought, we prob don't need to include business_encoded / business_id as a feature
training_data = reviews_df[['business_encoded', 'total_votes', 'prev6M_rating', 'prev6M_count', 'compound',
                           'neg', 'neu', 'pos']]

testing_data = reviews_df['projected6M_rating'].values

X_train, X_test, y_train, y_test = train_test_split(training_data, testing_data, random_state=0)

reg = GradientBoostingRegressor()
reg.fit(X_train, y_train)
reg.score(X_test, y_test)

reg.feature_importances_
plt.bar(x =np.arange(0, 8) ,height =reg.feature_importances_, tick_label=['total_votes', 'prev6M_rating', 'prev6M_count', 'compound',
                           'neg', 'neu', 'pos'])