In [8]:
import pandas as pd
import numpy as np

# Load preprocessed data
df = pd.read_csv('../artifacts/preprocessed_with_sentiments.csv')

# First, check the actual column names
print("Column names:")
print(df.columns.tolist())
print("\nDataframe shape:", df.shape)
print("\nFirst few rows:")
print(df.head())

# Check for duplicate column names
print("\nDuplicate columns:", df.columns[df.columns.duplicated()].tolist())

Column names:
['product_id', 'product_url', 'product_name', 'product_img', 'product_price', 'product_overall_rating', 'user_name', 'review_url', 'review_rating', 'clean_text', 'aspect_sentiments', 'battery_score', 'camera_score', 'display_score', 'performance_score', 'storage_score', 'design_score', 'price_score', 'sound_score']

Dataframe shape: (203, 19)

First few rows:
   product_id                                        product_url  \
0  B0CMDMKQB7  https://www.amazon.com/SAMSUNG-Smartphone-Unlo...   
1  B0CMDMKQB7  https://www.amazon.com/SAMSUNG-Smartphone-Unlo...   
2  B0CMDMKQB7  https://www.amazon.com/SAMSUNG-Smartphone-Unlo...   
3  B0CMDMKQB7  https://www.amazon.com/SAMSUNG-Smartphone-Unlo...   
4  B0CMDMKQB7  https://www.amazon.com/SAMSUNG-Smartphone-Unlo...   

                                        product_name  \
0  Samsung Galaxy S24 Ultra Cell Phone, 512GB AI ...   
1  Samsung Galaxy S24 Ultra Cell Phone, 512GB AI ...   
2  Samsung Galaxy S24 Ultra Cell Phone, 512GB A

In [10]:
import pandas as pd
import numpy as np

# Load preprocessed data
df = pd.read_csv('../artifacts/preprocessed_with_sentiments.csv')

# Create user-product interaction matrix
# Use user_name as rows and product_name as columns, with review_rating as values
user_product_matrix = df.pivot_table(
    index='user_name',
    columns='product_name',
    values='review_rating',
    aggfunc='mean',
    fill_value=0
)

print("User-Product Interaction Matrix:")
print(user_product_matrix.head())
print(f"\nShape: {user_product_matrix.shape}")

# Create aspect sentiment features for each product
aspect_cols = ['battery_score', 'camera_score', 'display_score', 'performance_score', 
               'storage_score', 'design_score', 'price_score', 'sound_score']

# Aggregate aspect scores by product
product_features = df.groupby('product_name')[aspect_cols].mean()

print("\n\nProduct Feature Matrix (Aspect Sentiments):")
print(product_features)

# Save the feature matrices
user_product_matrix.to_csv('../artifacts/user_product_matrix.csv')
product_features.to_csv('../artifacts/product_features.csv')

print("\n✓ Feature matrices saved successfully!")

User-Product Interaction Matrix:
product_name                                        Google - Pixel 3a XL with 64GB Memory Cell Phone (Unlocked) - Purple-ish  \
user_name                                                                                                                      
 Esta muy bonita.  Les gustó mucho Amis niños. ...                                                0.0                          
1INSXNE                                                                                           0.0                          
A                                                                                                 0.0                          
Adrian                                                                                            0.0                          
Ahmad                                                                                             0.0                          

product_name                                        Google Pixel 7a - 