# Logistic Regression to Predict If a User Will Leave a Review

## 1. Load the Dataset

In [None]:

import pandas as pd

# Load the dataset
file_path = 'Amazon Customer Behavior Survey.csv'
data = pd.read_csv(file_path)

# Display the first few rows
data.head()
    

## 2. Data Preprocessing

In [None]:

# Check for missing values
print(data.isnull().sum())

# Display columns to identify relevant features
print(data.columns)
    

### Selecting Features and Target

In [None]:

# Assume 'left_review' is the target column indicating if a user left a review (1 for Yes, 0 for No)
# Replace 'left_review' with the actual column name that represents review behavior in the dataset

# Select relevant features (modify this list based on the dataset's actual columns)
features = ['Age', 'Purchase Amount', 'Number of Purchases', 'Satisfaction Score']

# Target variable
target = 'left_review'

# Drop rows with missing values in selected columns
data = data.dropna(subset=features + [target])

# Features and target
X = data[features]
y = data[target]
    

## 3. Train-Test Split

In [None]:

from sklearn.model_selection import train_test_split

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    

## 4. Train Logistic Regression Model

In [None]:

from sklearn.linear_model import LogisticRegression

# Initialize and train the model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
    

## 5. Model Evaluation

In [None]:

from sklearn.metrics import accuracy_score, classification_report

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))
    

## 6. Feature Importance

In [None]:

# Display the coefficients of the model
feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': model.coef_[0]
}).sort_values(by='Coefficient', ascending=False)

print(feature_importance)
    

## Summary

This notebook trained a logistic regression model to predict whether a user will leave a review based on features like age, purchase amount, number of purchases, and satisfaction score. The model's performance and feature importance were evaluated to provide insights.