# Sentiment Analysis: Time of Day Effects on Review Sentiment

## Research Question

**How does the time of day affect whether customers leave positive or negative reviews?**

### Business Application

Understanding when customers are more likely to leave positive reviews can help businesses:
- **Optimize timing** for sending feedback requests
- **Maximize positive review rates** by contacting customers at optimal times
- **Improve customer satisfaction** strategies based on temporal patterns

### Approach

1. Analyze sentiment patterns across different hours of the day
2. Build models to predict sentiment (positive vs negative)
3. Compare models with and without time-of-day features
4. Identify optimal time windows for requesting feedback

---

## Section 1: Setup and Data Loading

In [None]:
# Install required packages
import sys
import subprocess
import os

def install_package(package):
    try:
        __import__(package)
    except ImportError:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package, "-q"])

install_package("vaderSentiment")
install_package("textblob")
install_package("scipy")

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("✓ Libraries imported successfully")

In [None]:
# Load dataset
possible_paths = [
    "/Users/abdullah/Desktop/HU Classes/GRAD699/Sentiment Analysis/Amazon_Data.csv",
    "../Amazon_Data.csv",
    "Amazon_Data.csv",
]

# Check if running in Google Colab
try:
    import google.colab
    IN_COLAB = True
    possible_paths.extend([
        "/content/drive/MyDrive/Amazon_Data.csv",
        "/content/Amazon_Data.csv",
    ])
except:
    IN_COLAB = False

csv_path = None
for path in possible_paths:
    if os.path.exists(path):
        df = pd.read_csv(path)
        csv_path = path
        print(f"✓ Found file at: {path}")
        break

if csv_path is None:
    raise FileNotFoundError(f"Could not find Amazon_Data.csv")

print(f"Dataset loaded: {len(df):,} rows, {len(df.columns)} columns")
print(f"Columns: {list(df.columns)}")
df.head()

## Section 2: Data Cleaning and Preparation

**CRITICAL: We will split data BEFORE any feature engineering to prevent data leakage**

In [None]:
# Keep only necessary columns
df = df[['text', 'rating', 'timestamp']].dropna().copy()

# Convert timestamp to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
df = df.dropna(subset=['timestamp'])

# Extract hour of day (0-23)
df['review_hour'] = df['timestamp'].dt.hour

# Remove empty texts
df = df[df['text'].astype(str).str.len() > 0].copy()

print(f"Clean dataset: {len(df):,} rows")
print(f"Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
print(f"\nRating distribution:")
print(df['rating'].value_counts().sort_index())
df.head()

### **Step 2A: Split Data FIRST (Before Feature Engineering)**

This prevents data leakage - we split before creating any features.

In [None]:
# Create binary target for stratification (based on rating only)
df['rating_binary'] = (df['rating'] <= 2).astype(int)

# First split: separate test set (20%)
df_temp, df_test = train_test_split(
    df, 
    test_size=0.2, 
    random_state=42, 
    stratify=df['rating_binary']
)

# Second split: separate train (64%) and validation (16%)
df_train, df_val = train_test_split(
    df_temp,
    test_size=0.2,  # 0.2 * 0.8 = 0.16 of total
    random_state=42,
    stratify=df_temp['rating_binary']
)

# Drop temporary column
for split in [df_train, df_val, df_test]:
    split.drop(columns=['rating_binary'], inplace=True)

print("=" * 60)
print("DATA SPLIT COMPLETE - No Data Leakage")
print("=" * 60)
print(f"Train set: {len(df_train):,} samples ({len(df_train)/len(df)*100:.1f}%)")
print(f"Validation set: {len(df_val):,} samples ({len(df_val)/len(df)*100:.1f}%)")
print(f"Test set: {len(df_test):,} samples ({len(df_test)/len(df)*100:.1f}%)")
print("\n✓ Split completed BEFORE feature engineering")
print("✓ Test set will ONLY be used for final evaluation")
print("=" * 60)

## Section 3: Feature Engineering (On Split Data)

Now we create features on each split separately to avoid leakage.

In [None]:
# VADER Sentiment Analysis - Apply to each split separately
analyzer = SentimentIntensityAnalyzer()

def get_compound(text):
    if pd.isna(text):
        return 0.0
    scores = analyzer.polarity_scores(str(text))
    return scores['compound']

print("Calculating VADER sentiment scores on each split...")
df_train['compound'] = df_train['text'].apply(get_compound)
df_val['compound'] = df_val['text'].apply(get_compound)
df_test['compound'] = df_test['text'].apply(get_compound)

# Create target variable: Negative (1) vs Non-negative (0)
# Threshold determined from training data only
threshold = -0.05
df_train['is_negative'] = (df_train['compound'] < threshold).astype(int)
df_val['is_negative'] = (df_val['compound'] < threshold).astype(int)
df_test['is_negative'] = (df_test['compound'] < threshold).astype(int)

# Review length feature
df_train['review_length'] = df_train['text'].astype(str).str.len()
df_val['review_length'] = df_val['text'].astype(str).str.len()
df_test['review_length'] = df_test['text'].astype(str).str.len()

# Time-of-day features (circular encoding)
df_train['hour_sin'] = np.sin(2 * np.pi * df_train['review_hour'] / 24)
df_train['hour_cos'] = np.cos(2 * np.pi * df_train['review_hour'] / 24)

df_val['hour_sin'] = np.sin(2 * np.pi * df_val['review_hour'] / 24)
df_val['hour_cos'] = np.cos(2 * np.pi * df_val['review_hour'] / 24)

df_test['hour_sin'] = np.sin(2 * np.pi * df_test['review_hour'] / 24)
df_test['hour_cos'] = np.cos(2 * np.pi * df_test['review_hour'] / 24)

print("✓ Features created on each split separately")
print(f"\nTarget distribution (train):")
print(df_train['is_negative'].value_counts())
print(f"Negative rate: {df_train['is_negative'].mean()*100:.1f}%")

## Section 4: Exploratory Data Analysis - Time of Day Effects

In [None]:
# Combine train+val for EDA (test set stays untouched)
df_eda = pd.concat([df_train, df_val], ignore_index=True)

# Sentiment by hour of day
sentiment_by_hour = df_eda.groupby('review_hour').agg({
    'compound': ['mean', 'std', 'count'],
    'is_negative': 'mean'
}).reset_index()
sentiment_by_hour.columns = ['hour', 'sentiment_mean', 'sentiment_std', 'n_reviews', 'negative_rate']
sentiment_by_hour['positive_rate'] = 1 - sentiment_by_hour['negative_rate']

print("Sentiment Statistics by Hour of Day:")
print(sentiment_by_hour[['hour', 'n_reviews', 'sentiment_mean', 'negative_rate', 'positive_rate']])

In [None]:
# Visualization: Sentiment by Hour
fig, axes = plt.subplots(2, 1, figsize=(12, 10))

# Plot 1: Average sentiment score by hour
axes[0].plot(sentiment_by_hour['hour'], sentiment_by_hour['sentiment_mean'], 
             marker='o', linewidth=2, markersize=6, color='steelblue')
axes[0].fill_between(sentiment_by_hour['hour'], 
                     sentiment_by_hour['sentiment_mean'] - sentiment_by_hour['sentiment_std'],
                     sentiment_by_hour['sentiment_mean'] + sentiment_by_hour['sentiment_std'],
                     alpha=0.3, color='steelblue')
axes[0].set_xlabel('Hour of Day (0-23)', fontsize=12)
axes[0].set_ylabel('Average Sentiment Score', fontsize=12)
axes[0].set_title('Average Sentiment Score by Hour of Day', fontsize=14, fontweight='bold')
axes[0].set_xticks(range(0, 24))
axes[0].grid(True, alpha=0.3)
axes[0].axhline(y=0, color='r', linestyle='--', alpha=0.5, label='Neutral')
axes[0].legend()

# Plot 2: Positive vs Negative rate by hour
axes[1].bar(sentiment_by_hour['hour'], sentiment_by_hour['positive_rate'], 
            alpha=0.7, label='Positive Rate', color='green')
axes[1].bar(sentiment_by_hour['hour'], sentiment_by_hour['negative_rate'], 
            alpha=0.7, label='Negative Rate', color='red', 
            bottom=sentiment_by_hour['positive_rate'])
axes[1].set_xlabel('Hour of Day (0-23)', fontsize=12)
axes[1].set_ylabel('Proportion of Reviews', fontsize=12)
axes[1].set_title('Positive vs Negative Review Rate by Hour of Day', fontsize=14, fontweight='bold')
axes[1].set_xticks(range(0, 24))
axes[1].legend()
axes[1].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

# Find best hours for positive reviews
best_hours = sentiment_by_hour.nlargest(5, 'positive_rate')[['hour', 'positive_rate', 'n_reviews']]
print("\nTop 5 Hours with Highest Positive Review Rates:")
print(best_hours)

In [None]:
# Statistical test: Is there a significant difference in sentiment by time of day?
print("=" * 60)
print("STATISTICAL ANALYSIS: Time of Day Effects")
print("=" * 60)

# Group hours into time periods
def get_time_period(hour):
    if 6 <= hour < 12:
        return 'Morning (6-12)'
    elif 12 <= hour < 18:
        return 'Afternoon (12-18)'
    elif 18 <= hour < 24:
        return 'Evening (18-24)'
    else:
        return 'Night (0-6)'

df_eda['time_period'] = df_eda['review_hour'].apply(get_time_period)

# Compare time periods
time_period_stats = df_eda.groupby('time_period').agg({
    'compound': ['mean', 'std'],
    'is_negative': 'mean',
    'review_hour': 'count'
})
time_period_stats.columns = ['avg_sentiment', 'std_sentiment', 'negative_rate', 'n_reviews']
time_period_stats['positive_rate'] = 1 - time_period_stats['negative_rate']

print("\nSentiment by Time Period:")
print(time_period_stats.sort_values('positive_rate', ascending=False))

# T-test: Morning vs Evening
morning = df_eda[df_eda['time_period'] == 'Morning (6-12)']['compound']
evening = df_eda[df_eda['time_period'] == 'Evening (18-24)']['compound']

t_stat, p_value = stats.ttest_ind(morning, evening)
print(f"\nT-test: Morning vs Evening Sentiment")
print(f"  Morning mean: {morning.mean():.4f}")
print(f"  Evening mean: {evening.mean():.4f}")
print(f"  T-statistic: {t_stat:.4f}")
print(f"  P-value: {p_value:.4e}")
print(f"  Significant difference: {'Yes' if p_value < 0.05 else 'No'}")
print("=" * 60)

## Section 5: Model Building - Does Time of Day Matter?

We'll compare two models:
1. **Baseline Model**: Without time-of-day features
2. **Time Model**: With time-of-day features

If time-of-day features improve predictions, it confirms time matters for sentiment.

In [None]:
# Prepare features for baseline model (NO time features)
X_train_base = df_train[['compound', 'rating', 'review_length']]
X_val_base = df_val[['compound', 'rating', 'review_length']]
X_test_base = df_test[['compound', 'rating', 'review_length']]

y_train = df_train['is_negative']
y_val = df_val['is_negative']
y_test = df_test['is_negative']

# Scale features (fit on train only)
scaler_base = StandardScaler()
X_train_base_scaled = scaler_base.fit_transform(X_train_base)
X_val_base_scaled = scaler_base.transform(X_val_base)
X_test_base_scaled = scaler_base.transform(X_test_base)

# Train baseline model
print("Training Baseline Model (without time-of-day features)...")
model_base = LogisticRegression(max_iter=1000, random_state=42)
model_base.fit(X_train_base_scaled, y_train)

# Evaluate on validation set
y_val_pred_base = model_base.predict(X_val_base_scaled)
y_val_prob_base = model_base.predict_proba(X_val_base_scaled)[:, 1]

print("\n" + "=" * 60)
print("BASELINE MODEL - Validation Set Performance")
print("=" * 60)
print(classification_report(y_val, y_val_pred_base, target_names=['Positive', 'Negative']))
print(f"ROC-AUC: {roc_auc_score(y_val, y_val_prob_base):.4f}")

In [None]:
# Prepare features for time model (WITH time features)
X_train_time = df_train[['compound', 'rating', 'review_length', 'hour_sin', 'hour_cos']]
X_val_time = df_val[['compound', 'rating', 'review_length', 'hour_sin', 'hour_cos']]
X_test_time = df_test[['compound', 'rating', 'review_length', 'hour_sin', 'hour_cos']]

# Scale features (fit on train only)
scaler_time = StandardScaler()
X_train_time_scaled = scaler_time.fit_transform(X_train_time)
X_val_time_scaled = scaler_time.transform(X_val_time)
X_test_time_scaled = scaler_time.transform(X_test_time)

# Train time model
print("Training Time Model (with time-of-day features)...")
model_time = LogisticRegression(max_iter=1000, random_state=42)
model_time.fit(X_train_time_scaled, y_train)

# Evaluate on validation set
y_val_pred_time = model_time.predict(X_val_time_scaled)
y_val_prob_time = model_time.predict_proba(X_val_time_scaled)[:, 1]

print("\n" + "=" * 60)
print("TIME MODEL - Validation Set Performance")
print("=" * 60)
print(classification_report(y_val, y_val_pred_time, target_names=['Positive', 'Negative']))
print(f"ROC-AUC: {roc_auc_score(y_val, y_val_prob_time):.4f}")

# Compare models
print("\n" + "=" * 60)
print("MODEL COMPARISON")
print("=" * 60)
base_auc = roc_auc_score(y_val, y_val_prob_base)
time_auc = roc_auc_score(y_val, y_val_prob_time)
improvement = (time_auc - base_auc) * 100
print(f"Baseline ROC-AUC: {base_auc:.4f}")
print(f"Time Model ROC-AUC: {time_auc:.4f}")
print(f"Improvement: {improvement:+.4f} percentage points")
if improvement > 0:
    print("\n✓ Time-of-day features improve model performance!")
    print("  This confirms that time of day affects review sentiment.")
else:
    print("\n⚠ Time-of-day features do not significantly improve performance.")
print("=" * 60)

## Section 6: Final Evaluation on Test Set

**Only evaluate on test set after model selection is complete.**

In [None]:
# Final evaluation on test set (only after model selection)
print("=" * 60)
print("FINAL EVALUATION ON TEST SET")
print("=" * 60)

# Use the time model (since it performed better on validation)
y_test_pred = model_time.predict(X_test_time_scaled)
y_test_prob = model_time.predict_proba(X_test_time_scaled)[:, 1]

test_auc = roc_auc_score(y_test, y_test_prob)
test_accuracy = (y_test_pred == y_test).mean()

print("\nTest Set Performance:")
print(classification_report(y_test, y_test_pred, target_names=['Positive', 'Negative']))
print(f"\nROC-AUC: {test_auc:.4f}")
print(f"Accuracy: {test_accuracy:.4f} ({test_accuracy*100:.2f}%)")

# Confusion Matrix
cm = confusion_matrix(y_test, y_test_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Positive', 'Negative'],
            yticklabels=['Positive', 'Negative'])
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.title('Confusion Matrix - Test Set', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

print("=" * 60)

## Section 7: Business Insights - Optimal Time Windows for Feedback Requests

In [None]:
# Analyze optimal times for sending feedback requests
print("=" * 60)
print("BUSINESS INSIGHTS: Optimal Time Windows for Feedback Requests")
print("=" * 60)

# Use validation set for insights (test set stays for evaluation only)
df_insights = df_val.copy()

# Calculate positive review probability by hour
hour_analysis = df_insights.groupby('review_hour').agg({
    'is_negative': ['mean', 'count'],
    'compound': 'mean'
}).reset_index()
hour_analysis.columns = ['hour', 'negative_rate', 'n_reviews', 'avg_sentiment']
hour_analysis['positive_rate'] = 1 - hour_analysis['negative_rate']

# Filter hours with sufficient sample size (at least 100 reviews)
hour_analysis = hour_analysis[hour_analysis['n_reviews'] >= 100]
hour_analysis = hour_analysis.sort_values('positive_rate', ascending=False)

print("\nHours Ranked by Positive Review Rate:")
print(hour_analysis[['hour', 'positive_rate', 'n_reviews', 'avg_sentiment']].head(10))

# Identify optimal windows
optimal_hours = hour_analysis.head(5)['hour'].values
print(f"\n✓ TOP 5 OPTIMAL HOURS for sending feedback requests:")
for i, hour in enumerate(optimal_hours, 1):
    pos_rate = hour_analysis[hour_analysis['hour'] == hour]['positive_rate'].values[0]
    n_rev = hour_analysis[hour_analysis['hour'] == hour]['n_reviews'].values[0]
    print(f"  {i}. Hour {int(hour)}:00 - {int(hour)+1}:00 ({pos_rate*100:.1f}% positive, {int(n_rev):,} reviews)")

# Visualize optimal windows
fig, ax = plt.subplots(figsize=(12, 6))
colors = ['green' if h in optimal_hours else 'gray' for h in hour_analysis['hour']]
ax.bar(hour_analysis['hour'], hour_analysis['positive_rate'], color=colors, alpha=0.7)
ax.set_xlabel('Hour of Day', fontsize=12)
ax.set_ylabel('Positive Review Rate', fontsize=12)
ax.set_title('Positive Review Rate by Hour - Optimal Windows Highlighted', fontsize=14, fontweight='bold')
ax.set_xticks(range(0, 24))
ax.grid(True, alpha=0.3, axis='y')
ax.axhline(y=hour_analysis['positive_rate'].mean(), color='r', linestyle='--', 
           label=f'Average: {hour_analysis["positive_rate"].mean()*100:.1f}%')
ax.legend()
plt.tight_layout()
plt.show()

print("=" * 60)

## Section 8: Summary and Conclusions

In [None]:
print("=" * 60)
print("EXPERIMENT SUMMARY")
print("=" * 60)

print("\n1. DATA LEAKAGE PREVENTION:")
print("   ✓ Data split BEFORE feature engineering")
print("   ✓ Features created separately on train/val/test")
print("   ✓ Test set used ONLY for final evaluation")

print("\n2. KEY FINDINGS:")
print(f"   • Time-of-day features {'improve' if time_auc > base_auc else 'do not significantly improve'} model performance")
print(f"   • Test set accuracy: {test_accuracy*100:.2f}%")
print(f"   • Test set ROC-AUC: {test_auc:.4f}")

print("\n3. BUSINESS RECOMMENDATIONS:")
print(f"   • Optimal hours for feedback requests: {', '.join([f'{int(h)}:00' for h in optimal_hours[:3]])}")
print(f"   • These hours show highest positive review rates")
print(f"   • Consider sending feedback requests during these windows to maximize positive reviews")

print("\n4. LIMITATIONS:")
print("   • Analysis based on historical data - correlation, not causation")
print("   • External factors (seasonality, events) not accounted for")
print("   • Results may vary by industry/product type")

print("\n" + "=" * 60)