# E-commerce Return Rate Reduction Analysis
This notebook includes data cleaning, return percentage analysis, logistic regression, and export of high-risk products.

In [None]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Load datasets
orders = pd.read_csv('orders.csv')  # replace with actual path
returns = pd.read_csv('returns.csv')

In [None]:
# Merge and clean data
df = pd.merge(orders, returns, on='order_id', how='left')
df['is_returned'] = df['return_id'].notnull().astype(int)

# Return percentage by category and supplier
return_stats = df.groupby(['category', 'supplier'])['is_returned'].mean().reset_index()
return_stats.rename(columns={'is_returned': 'return_rate'}, inplace=True)
return_stats.to_csv('return_percentage_by_category_supplier.csv', index=False)

In [None]:
# Logistic Regression
features = ['price', 'quantity', 'category_encoded', 'region_encoded']  # Example features
# df['category_encoded'], _ = pd.factorize(df['category']) etc.
X = df[features]
y = df['is_returned']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

In [None]:
# Predict and export high-risk products
df['return_probability'] = model.predict_proba(X)[:, 1]
high_risk_products = df[df['return_probability'] > 0.7]
high_risk_products.to_csv('high_risk_products.csv', index=False)