In [3]:
# Upload CSV file manually
from google.colab import files
uploaded = files.upload()

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

# Load the uploaded file (assumes only one file)
filename = list(uploaded.keys())[0]
df = pd.read_csv(filename)

# Clean and transform target column
df['Return_Binary'] = df['Return_Status'].apply(lambda x: 1 if x == 'Returned' else 0)

# One-hot encode categorical features
df_encoded = pd.get_dummies(df, columns=[
    'Product_Category', 'User_Gender', 'User_Location',
    'Payment_Method', 'Shipping_Method'
], drop_first=True)

# Save original for merging later
df_original = df.copy()

# Features and target
X = df_encoded.drop(columns=[
    'Order_ID', 'Product_ID', 'User_ID', 'Order_Date',
    'Return_Date', 'Return_Reason', 'Return_Status', 'Return_Binary'
])
y = df_encoded['Return_Binary']

# Fill missing values
X = X.fillna(0)

# Train-test split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_scaled_full = scaler.transform(X)

# Train model
model = LogisticRegression(max_iter=2000)
model.fit(X_train_scaled, y_train)

# Evaluate on test set
y_test_pred = model.predict(scaler.transform(X_test))
print("\n📊 Classification Report on Test Data:")
print(classification_report(y_test, y_test_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

# Predict on entire dataset
y_full_pred = model.predict(X_scaled_full)

# Save full prediction result
df_result = df_original.copy()
df_result['Predicted_Return'] = y_full_pred

# Save to CSV
output_filename = "predicted_returns_full.csv"
df_result.to_csv(output_filename, index=False)
print(f"\n✅ File saved as {output_filename}")

# Download in Colab
files.download(output_filename)


Saving ecommerce_returns_synthetic_data.csv to ecommerce_returns_synthetic_data.csv

📊 Classification Report on Test Data:
              precision    recall  f1-score   support

           0       0.95      1.00      0.97      1009
           1       1.00      0.94      0.97       991

    accuracy                           0.97      2000
   macro avg       0.97      0.97      0.97      2000
weighted avg       0.97      0.97      0.97      2000

Confusion Matrix:
 [[1009    0]
 [  55  936]]

✅ File saved as predicted_returns_full.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [5]:
# Example Python snippet
high_risk = df_result[df_result['Predicted_Return'] == 1]
risk_summary = high_risk.groupby('Product_Category').size().reset_index(name='Predicted_Returns')
risk_summary.sort_values(by='Predicted_Returns', ascending=False).head(10).to_csv("high_risk_products.csv", index=False)

In [7]:
from google.colab import files
files.download("high_risk_products.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>