In [1]:
import pandas as pd
import sys
sys.path.append('../src')
from data_processing import loading_data, basic_cleaning
from feature_engineering import extracting_feature, extracting_sentiment_features, extracting_vader_sentiment

# Load your data
reviews, products = loading_data()
clean_reviews = basic_cleaning(reviews)

print(clean_reviews['clean_text'].head())

print("Data loaded successfully!")
print(f"Shape: {clean_reviews.shape}")
print(f"Columns: {clean_reviews.columns.tolist()}")

# Check what's in clean_text column
print("\n=== CHECKING CLEAN_TEXT COLUMN ===")
print("First 10 clean_text values:")
for i in range(10):
    text = clean_reviews.iloc[i]['clean_text']
    print(f"Row {i}: Type={type(text)}, Length={len(str(text))}")
    print(f"Content: '{text}'")
    print(f"Is NaN? {pd.isna(text)}")
    print("---")

Started with 21674 reviews
After cleaning: 21674 reviews
0    super good, don't get me wrong. but i came for...
1    i decided to try it out although i’m not a hug...
2    my caramel core begins to disappear about half...
3    why are people complaining about the blonde br...
4    this ice cream is worst ice cream i’ve ever ta...
Name: clean_text, dtype: object
Data loaded successfully!
Shape: (21674, 14)
Columns: ['brand', 'key', 'author', 'date', 'stars', 'title', 'helpful_yes', 'helpful_no', 'text', 'taste', 'ingredients', 'texture', 'likes', 'clean_text']

=== CHECKING CLEAN_TEXT COLUMN ===
First 10 clean_text values:
Row 0: Type=<class 'str'>, Length=603
Content: 'super good, don't get me wrong. but i came for the caramel and brownies, not the sweet cream. the packaging made it seem like brownies were packed and bountiful *crying frowny emoji* i'd say the taste of this was amazing, but the ratio of brownie to sweet cream was disappointing. liked it regardless but probably won't bu

In [None]:
# Test each function individually
print("\n=== TESTING INDIVIDUAL FUNCTIONS ===")

# Get first non-empty text
sample_text = None
for i in range(len(clean_reviews)):
    text = clean_reviews.iloc[i]['clean_text']
    if pd.notna(text) and str(text).strip() != '':
        sample_text = text
        print(f"Found good sample at row {i}: '{sample_text[:100]}...'")
        break

if sample_text:
    # Test basic features
    print("\n1. Testing basic features:")
    basic_result = extracting_feature(sample_text)
    print(basic_result)
    
    # Test spaCy features
    print("\n2. Testing spaCy features:")
    try:
        spacy_result = extracting_sentiment_features(sample_text)
        print(spacy_result)
    except Exception as e:
        print(f"spaCy Error: {e}")
    
    # Test VADER features
    print("\n3. Testing VADER features:")
    try:
        vader_result = extracting_vader_sentiment(sample_text)
        print(vader_result)
    except Exception as e:
        print(f"VADER Error: {e}")
else:
    print("No valid text found! This is the problem!")