In [1]:
import pandas as pd
import numpy as np
#import matplotlib.pyplot as plt
#import seaborn as sns
from sklearn.model_selection import train_test_split
#from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import TruncatedSVD
from sklearn.naive_bayes import MultinomialNB
from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder

from sklearn.metrics.pairwise import pairwise_distances

In [3]:
# output the dataset:
df = pd.read_csv('superstore.csv',on_bad_lines='skip')

In [5]:
#rename the columns
df.columns = ['Category', 'City', 'Country', 'CustomerID', 'CustomerName',
       'Discount', 'Market', '记录数','OrderDate', 'OrderID', 'OrderPriority',
       'ProductID', 'ProductName', 'Profit', 'Quantity', 'Region', 'RowID',
       'Sales', 'Segment', 'ShipDate', 'ShipMode', 'ShippingCost', 'State',
       'SubCategory', 'Year', 'Market2', 'weeknum']

In [6]:
# modify the date columns

#change these columns into dates:
df['OrderDate'] = pd.to_datetime(df['OrderDate'], errors='coerce')
df['ShipDate'] = pd.to_datetime(df['ShipDate'], errors='coerce')

# check the column name, if i exists in columns >> change the column to string then loop on the next 
#change the next columns into strings:
columns = df[['Category','City','Country','CustomerID','CustomerName','Market','OrderID','OrderPriority','Segment',
             'ShipMode','State','SubCategory','Market2','ProductID','ProductName','Region']]
# to iterate over the 'object' column to change into string type
for i in columns:
    df[i] = df[i].astype('string')

In [7]:
# create this column for more understanding! might delete it later or the year colu.

df['DaysofOrderPreparation'] = (df['ShipDate'] - df['OrderDate']).dt.days

In [8]:
# create rating column

start = 1
end = 10
width = end - start
rating = round((df['Quantity'] - df['Quantity'].min())/(df['Quantity'].max() - df['Quantity'].min()) * width + start).astype(int)
df['Rating'] = rating

In [9]:
#drop unnecessary columns 
df.drop(['City','Country','OrderDate','ShipDate','weeknum','Market2','ProductID','Region',
         'State','Quantity','OrderID','Category','SubCategory','CustomerName','记录数'],axis=1,inplace = True)

#columns I will delete: customername, shipdate, country,city,orderdate, delete rowId for now,ProductID,
#columns I will encode: market,Segment,OrderPriority,ShipMode via ONE or label encoding
#Quantity: deleted as I replaced it by rating column
#work on customerID

In [10]:
#modify the customerID to more manageable structure
df['CustomerID'] = df['CustomerID'].str.replace('-','') #done hope this works!

In [11]:
#feature engineering:
# one: market,
# label encode. for ranking, order stru.: ShipMode, Segment [1,2,3],OrderPriority [critical:1,high:2....]
### use replace w ShipMode to navigate the values correctly..

In [12]:
# One hot encoding:
df = pd.get_dummies(df, columns=['Market'])

In [14]:
#label encoding

df['OrderPriority'] = df['OrderPriority'].replace(['Critical','High','Medium','Low'],['1','2','3','4']).astype(int)
df['Segment'] = df['Segment'].replace(['Consumer','Corporate','Home Office'],['0','1','2']).astype(int)
df['ShipMode'] = df['ShipMode'].replace(['Standard Class','First Class','Second Class','Same Day'],['0','1','2','3']).astype(int)

In [15]:
# Separate user characteristics, product details, and ratings
user_features = df.drop(['CustomerID','ProductName','Rating'], axis=1)
product_details = df["ProductName"]
ratings = df["Rating"]

In [17]:
group = df.groupby('CustomerID')

df2 = group.apply(lambda x: x['Rating'].unique().tolist())
df2.values.tolist()

[[2, 3, 1, 5, 9, 4],
 [2, 1, 5, 4, 3],
 [4, 2],
 [2, 1, 5],
 [1, 6, 4, 3, 2],
 [4, 2, 6, 3],
 [2, 4, 1, 6],
 [2, 4, 1],
 [4, 2, 3],
 [1, 4, 6, 5, 2, 3],
 [4, 2, 5, 6],
 [1, 2, 6, 3],
 [5, 2, 1, 4, 3],
 [2, 3, 1, 7, 5, 4],
 [4, 2, 5, 3],
 [2, 3, 4],
 [1],
 [2, 4, 1],
 [2, 6, 1, 3],
 [3],
 [1, 2],
 [2, 1],
 [2],
 [4, 2, 1],
 [1, 3, 2, 4],
 [1],
 [3, 1, 2, 4, 6, 9, 5],
 [2, 5, 4, 1, 6, 3],
 [7, 2, 4, 1],
 [2, 1],
 [2, 1, 6, 7, 3],
 [3, 4, 7, 2],
 [2, 4, 5, 7, 10, 3, 6],
 [2, 3, 1, 7, 5],
 [1, 3, 2, 4, 5],
 [2, 6, 1, 4, 3],
 [2, 3, 5, 4],
 [2, 1, 9, 4, 7, 3],
 [2, 4, 1],
 [3, 4, 2, 5, 6],
 [4, 2, 5, 3, 1],
 [2, 3, 4, 1],
 [4, 6, 2, 3, 1],
 [2, 4, 3, 1, 5, 6, 7],
 [1, 6, 4, 2],
 [4, 2, 1, 3],
 [6, 2, 7, 1, 4, 8, 3],
 [2, 3, 1],
 [2, 3, 4, 1],
 [4, 2, 3, 1],
 [1, 2],
 [2],
 [1, 6, 2, 3],
 [4, 3, 1, 2, 5],
 [2, 7, 5, 4, 3],
 [2, 5],
 [2, 4, 10, 3],
 [1, 2, 10, 3],
 [1, 3],
 [1, 2, 6],
 [1],
 [6],
 [1, 2],
 [3, 1, 2],
 [1, 2],
 [2],
 [6, 9, 1, 2, 3],
 [1, 2, 3],
 [6, 3, 2],
 [1, 2, 4],
 [3, 2,

In [18]:
interaction_matrix = df.pivot_table(index='CustomerID',columns='ProductName',values='Rating').fillna(0)
interaction_matrix

ProductName,"""While you Were Out"" Message Book, One Form pe...","#10 Gummed Flap White Envelopes, 100/Box",#10 Self-Seal White Envelopes,"#10 White Business Envelopes,4 1/8 x 9 1/2","#10- 4 1/8"" x 9 1/2"" Recycled Envelopes","#10- 4 1/8"" x 9 1/2"" Security-Tint Envelopes","#10-4 1/8"" x 9 1/2"" Premium Diagonal Seam Enve...",#6 3/4 Gummed Flap White Envelopes,"1.7 Cubic Foot Compact ""Cube"" Office Refrigera...",1/4 Fold Party Design Invitations & White Enve...,...,Zebra ZM400 Thermal Label Printer,Zebra Zazzle Fluorescent Highlighters,Zipper Ring Binder Pockets,i.Sound Portable Power - 8000 mAh,iHome FM Clock Radio with Lightning Dock,iKross Bluetooth Portable Keyboard + Cell Phon...,iOttie HLCRIO102 Car Mount,iOttie XL Car Mount,invisibleSHIELD by ZAGG Smudge-Free Screen Pro...,netTALK DUO VoIP Telephone Service
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AA103151,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AA103152,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AA103153,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AA103154,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AA103751,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZD119252,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZD219251,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZD219252,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZD219253,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
# Train-test split (optional, but recommended for evaluation)
user_features_train, user_features_test, ratings_train, ratings_test = train_test_split(
    interaction_matrix, df2.values.tolist(), test_size=0.3, random_state=42)

In [22]:
# Standardize user characteristics (optional, but may improve performance)
# scaler = StandardScaler()
# user_features_train_scaled = scaler.fit_transform(user_features_train)
# user_features_test_scaled = scaler.transform(user_features_test)

In [23]:
# Apply SVD to user-item rating matrix
# user_item_matrix = ratings_train.T # Transpose for user-based filtering
# user_item_matrix = user_item_matrix.values.reshape(1,-1)
svd = TruncatedSVD(n_components=17, n_iter=7, random_state=42)  # Choose appropriate dimensionality
user_features = svd.fit_transform(interaction_matrix)
user_features

array([[ 6.92926413e-01, -2.23418420e-05, -3.38292189e-01, ...,
        -2.96429310e-01,  2.22913841e-01, -3.67582211e-02],
       [ 9.39266542e-01,  1.18332709e-04,  3.10169422e-02, ...,
         7.20021250e-02,  3.32582220e-01, -1.84669174e-01],
       [ 5.06629782e-01,  4.47669542e-06,  1.15378525e-02, ...,
        -7.68287055e-02,  1.49876475e-01,  1.23252607e-01],
       ...,
       [ 6.46978475e-01,  7.63710474e-05, -5.90968282e-02, ...,
         1.59541265e-01, -7.64107295e-02, -3.17802363e-03],
       [ 1.09816323e-01,  5.56152339e-07,  1.94721690e-02, ...,
         1.18168795e-02,  3.75258820e-02,  3.54439239e-02],
       [ 6.23651538e-07,  1.24534404e-01, -1.45830607e-03, ...,
        -2.08707343e-02,  9.32435885e-03,  3.03242755e-02]])

In [25]:
user_features_df = pd.DataFrame(user_features, index=interaction_matrix.index, columns=[f'feature_{i+1}' for i in range(17)])
user_features_df

Unnamed: 0_level_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
AA103151,6.929264e-01,-2.234184e-05,-0.338292,0.069067,-0.502625,-0.200125,0.267181,-0.336755,0.168845,-0.183712,-0.020523,0.091581,0.010762,-0.160560,-0.296429,0.222914,-0.036758
AA103152,9.392665e-01,1.183327e-04,0.031017,0.124008,0.047527,-0.055555,0.062859,-0.483629,-0.459342,0.387227,0.163724,-0.728389,-0.446991,-0.265246,0.072002,0.332582,-0.184669
AA103153,5.066298e-01,4.476695e-06,0.011538,-0.220289,0.132352,0.047481,0.073318,-0.065315,-0.090543,0.018356,0.097547,0.223695,0.107289,-0.277160,-0.076829,0.149876,0.123253
AA103154,-5.510799e-07,1.986580e+00,0.001176,0.000079,-0.005143,0.012846,0.005026,0.002030,0.003099,0.003335,-0.028123,-0.012829,-0.032444,0.020785,0.017007,-0.007007,-0.018383
AA103751,5.242345e-01,-6.549769e-06,-0.087242,0.199414,0.210206,0.152308,-0.170639,-0.007349,0.433250,-0.033079,0.536102,-0.401124,0.148385,-0.399034,0.300660,-0.105084,0.008588
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZD119252,3.612092e-01,-7.211181e-07,-0.025832,-0.025573,-0.050302,0.005298,-0.077551,0.069224,-0.247358,-0.218236,0.112798,-0.224104,0.004005,0.100326,0.017964,0.170200,-0.100681
ZD219251,6.265902e-01,-1.387128e-05,-0.129590,-0.158018,0.044404,0.261974,-0.082649,-0.138224,0.033030,0.132629,0.110495,0.201513,-0.039628,0.114589,-0.243578,0.058170,0.209346
ZD219252,6.469785e-01,7.637105e-05,-0.059097,-0.277651,0.135774,-0.150260,-0.169307,0.116955,0.253650,0.009291,-0.178395,0.155052,-0.074308,0.234677,0.159541,-0.076411,-0.003178
ZD219253,1.098163e-01,5.561523e-07,0.019472,0.016655,-0.011966,-0.004056,-0.028189,-0.015182,0.053723,-0.009804,-0.002327,-0.011430,0.068886,0.137253,0.011817,0.037526,0.035444


In [26]:
kmeans = KMeans(n_clusters=17, random_state=42)
kmeans

In [27]:
user_clusters = kmeans.fit_predict(user_features_df)
user_clusters

  super()._check_params_vs_input(X, default_n_init=10)


array([1, 1, 0, ..., 9, 0, 0], dtype=int32)

In [28]:
user_features_df['cluster'] = user_clusters
user_features_df

Unnamed: 0_level_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,feature_11,feature_12,feature_13,feature_14,feature_15,feature_16,feature_17,cluster
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
AA103151,6.929264e-01,-2.234184e-05,-0.338292,0.069067,-0.502625,-0.200125,0.267181,-0.336755,0.168845,-0.183712,-0.020523,0.091581,0.010762,-0.160560,-0.296429,0.222914,-0.036758,1
AA103152,9.392665e-01,1.183327e-04,0.031017,0.124008,0.047527,-0.055555,0.062859,-0.483629,-0.459342,0.387227,0.163724,-0.728389,-0.446991,-0.265246,0.072002,0.332582,-0.184669,1
AA103153,5.066298e-01,4.476695e-06,0.011538,-0.220289,0.132352,0.047481,0.073318,-0.065315,-0.090543,0.018356,0.097547,0.223695,0.107289,-0.277160,-0.076829,0.149876,0.123253,0
AA103154,-5.510799e-07,1.986580e+00,0.001176,0.000079,-0.005143,0.012846,0.005026,0.002030,0.003099,0.003335,-0.028123,-0.012829,-0.032444,0.020785,0.017007,-0.007007,-0.018383,8
AA103751,5.242345e-01,-6.549769e-06,-0.087242,0.199414,0.210206,0.152308,-0.170639,-0.007349,0.433250,-0.033079,0.536102,-0.401124,0.148385,-0.399034,0.300660,-0.105084,0.008588,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZD119252,3.612092e-01,-7.211181e-07,-0.025832,-0.025573,-0.050302,0.005298,-0.077551,0.069224,-0.247358,-0.218236,0.112798,-0.224104,0.004005,0.100326,0.017964,0.170200,-0.100681,0
ZD219251,6.265902e-01,-1.387128e-05,-0.129590,-0.158018,0.044404,0.261974,-0.082649,-0.138224,0.033030,0.132629,0.110495,0.201513,-0.039628,0.114589,-0.243578,0.058170,0.209346,9
ZD219252,6.469785e-01,7.637105e-05,-0.059097,-0.277651,0.135774,-0.150260,-0.169307,0.116955,0.253650,0.009291,-0.178395,0.155052,-0.074308,0.234677,0.159541,-0.076411,-0.003178,9
ZD219253,1.098163e-01,5.561523e-07,0.019472,0.016655,-0.011966,-0.004056,-0.028189,-0.015182,0.053723,-0.009804,-0.002327,-0.011430,0.068886,0.137253,0.011817,0.037526,0.035444,0


In [29]:
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

In [30]:
df['CustomerID_enc'] = user_encoder.fit_transform(df['CustomerID'])
df['ProductName_enc'] = item_encoder.fit_transform(df['ProductName'])

Unnamed: 0,CustomerID,Discount,OrderPriority,ProductName,Profit,RowID,Sales,Segment,ShipMode,ShippingCost,...,Rating,Market_APAC,Market_Africa,Market_Canada,Market_EMEA,Market_EU,Market_LATAM,Market_US,CustomerID_enc,ProductName_enc
0,LS172304,0.0,2,Xerox 225,9.3312,36624,19,0,2,4.37,...,2,False,False,False,False,False,False,True,2909,3736
1,MV174854,0.0,3,"Wirebound Service Call Books, 5 1/2"" x 4""",9.2928,37033,19,0,0,0.94,...,2,False,False,False,False,False,False,True,3341,3569
2,CS121304,0.0,3,"Adams Phone Message Book, Professional, 400 Me...",9.8418,31468,21,0,0,1.81,...,2,False,False,False,False,False,False,True,1057,175
3,CS121304,0.0,3,Xerox 1913,53.2608,31469,111,0,0,4.59,...,2,False,False,False,False,False,False,True,1057,3612
4,AP109154,0.0,2,Xerox 223,3.1104,32440,6,0,0,1.32,...,1,False,False,False,False,False,False,True,277,3734
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51285,AM103604,0.2,2,Satellite Sectional Post Binders,22.5732,33646,69,1,3,5.15,...,2,False,False,False,False,False,False,True,253,3086
51286,AM103604,0.2,2,Staples,3.1584,33645,9,1,3,0.44,...,4,False,False,False,False,False,False,True,253,3275
51287,HR147704,0.2,3,GBC Plastic Binding Combs,4.2804,32321,12,2,1,0.31,...,2,False,False,False,False,False,False,True,2039,1505
51288,RM196754,0.2,1,Vinyl Sectional Post Binders,33.9300,35917,90,2,1,15.95,...,2,False,False,False,False,False,False,True,3987,3492


In [32]:
cluster_models = {}

# Train a Naive Bayes model for each cluster
for cluster in range(17):
    cluster_users = user_features_df[user_features_df['cluster'] == cluster].index
    cluster_data = df[df['CustomerID'].isin(cluster_users)]
    
    X = cluster_data[['CustomerID_enc', 'ProductName_enc']]
    y = cluster_data['Rating']
    
    model = MultinomialNB()
    model.fit(X, y)
    cluster_models[cluster] = model
cluster_models

{0: MultinomialNB(),
 1: MultinomialNB(),
 2: MultinomialNB(),
 3: MultinomialNB(),
 4: MultinomialNB(),
 5: MultinomialNB(),
 6: MultinomialNB(),
 7: MultinomialNB(),
 8: MultinomialNB(),
 9: MultinomialNB(),
 10: MultinomialNB(),
 11: MultinomialNB(),
 12: MultinomialNB(),
 13: MultinomialNB(),
 14: MultinomialNB(),
 15: MultinomialNB(),
 16: MultinomialNB()}

In [33]:
unique_users = df['CustomerID'].unique()
unique_items = df['ProductName'].unique()

In [34]:
# Create a DataFrame to store recommendations
recommendations_list = []

# Predict probabilities for all user-item pairs
all_user_item_pairs = pd.DataFrame([(user, item) for user in unique_users for item in unique_items], columns=['CustomerID', 'ProductName'])
all_user_item_pairs['CustomerID_enc'] = user_encoder.transform(all_user_item_pairs['CustomerID'])
all_user_item_pairs['ProductName_enc'] = item_encoder.transform(all_user_item_pairs['ProductName'])
all_user_item_pairs

Unnamed: 0,CustomerID,ProductName,CustomerID_enc,ProductName_enc
0,LS172304,Xerox 225,2909,3736
1,LS172304,"Wirebound Service Call Books, 5 1/2"" x 4""",2909,3569
2,LS172304,"Adams Phone Message Book, Professional, 400 Me...",2909,175
3,LS172304,Xerox 1913,2909,3612
4,LS172304,Xerox 223,2909,3734
...,...,...,...,...
18458919,AO108104,Cisco SPA 501G IP Phone,266,957
18458920,AO108104,Hewlett-Packard Deskjet D4360 Printer,266,1782
18458921,AO108104,"Vtech AT&T CL2940 Corded Speakerphone, Black",266,3493
18458922,AO108104,Barricks Non-Folding Utility Table with Steel ...,266,532


In [35]:
# Set a threshold for interaction probability
threshold = 0.50

# Generate recommendations for each user based on their cluster
for user in unique_users:
    cluster = user_features_df.loc[user, 'cluster']
    model = cluster_models[cluster]
    
    
    user_data = all_user_item_pairs[all_user_item_pairs['CustomerID'] == user]
    user_data['interaction_proba'] = model.predict_proba(user_data[['CustomerID_enc', 'ProductName_enc']])[:, 1]
    
    # Apply threshold
    recommended_items = user_data[user_data['interaction_proba'] >= threshold]['ProductName'].tolist()#[:3] forget it!!
    recommendations_list.append({'CustomerID': user, 'recommended_items': recommended_items})



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_data['interaction_proba'] = model.predict_proba(user_data[['CustomerID_enc', 'ProductName_enc']])[:, 1]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_data['interaction_proba'] = model.predict_proba(user_data[['CustomerID_enc', 'ProductName_enc']])[:, 1]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-ver

In [41]:
recommendations_df = pd.DataFrame(recommendations_list)
recommendations_df

Unnamed: 0,CustomerID,recommended_items
0,LS172304,"[Personal Creations Ink Jet Cards and Labels, ..."
1,MV174854,[]
2,CS121304,[]
3,AP109154,[]
4,JF154904,[]
...,...,...
4868,RE194054,[]
4869,PH187904,[]
4870,JR157004,[]
4871,CJ118754,[]


In [42]:
recommendations_df.iloc[0].recommended_items == recommendations_df.iloc[1].recommended_items

False

In [38]:
len(recommendations_df.iloc[0].recommended_items)

199

In [39]:
recommendations_df.to_csv("recommendations_df.csv",index=False)

In [43]:
recommendations_df.loc[10][1]

[]