### Test Data

In [None]:
import pandas as pd
import numpy as np

np.random.seed(42)

n_samples = 100

data = {
    "Product_ID": [f"P{100+i}" for i in range(n_samples)],
    "Month": np.random.choice([
        "Jan", "Feb", "Mar", "Apr", "May", "Jun", 
        "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
    ], size=n_samples),
    "Price": np.random.randint(100, 1000, size=n_samples),
    "Discount_Percentage": np.random.randint(0, 30, size=n_samples),
    "Ad_Spend": np.random.randint(1000, 10000, size=n_samples),
    "Last_Month_Sales": np.random.randint(50, 300, size=n_samples)
}

data["Units_Sold"] = (
    0.5 * data["Price"] -
    2.0 * data["Discount_Percentage"] +
    0.05 * data["Ad_Spend"] +
    0.8 * data["Last_Month_Sales"] +
    np.random.normal(0, 20, size=n_samples)  # noise
).astype(int)

df = pd.DataFrame(data)

csv_path = "ecommerce_sales_data.csv"
df.to_csv(csv_path, index=False)

csv_path


'ecommerce_sales_data.csv'

### Preprocess the data

In [None]:
import pandas as pd

df = pd.read_csv("ecommerce_sales_data.csv")

df = pd.get_dummies(df, columns=["Month"], drop_first=True)

X = df.drop(columns=["Product_ID", "Units_Sold"]) 
y = df["Units_Sold"]

print(X.head())
print("\nTarget variable:")
print(y.head())


   Price  Discount_Percentage  Ad_Spend  Last_Month_Sales  Month_Aug  \
0    956                   24      5736               153          0   
1    938                   26      2802               210          0   
2    620                   29      9155               186          0   
3    443                   24      9120                92          1   
4    228                   19      7616               295          0   

   Month_Dec  Month_Feb  Month_Jan  Month_Jul  Month_Jun  Month_Mar  \
0          0          0          0          1          0          0   
1          0          0          0          0          0          0   
2          0          0          0          0          0          0   
3          0          0          0          0          0          0   
4          0          0          0          0          0          0   

   Month_May  Month_Nov  Month_Oct  Month_Sep  
0          0          0          0          0  
1          0          0          0          

### Train the model

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("R² Score:", r2)
print("Mean Absolute Error:", mae)

manual_input = [
    400,     # Price
    20,      # Discount_Percentage
    2000,    # Ad_Spend
    90,      # Last_Month_Sales
    0,  # Month_Apr
    1,  # Month_Feb
    0,  # Month_Jan
    0,  # Month_Jul
    0,  # Month_Jun
    0,  # Month_Mar
    0,  # Month_May
    0,  # Month_Nov
    0,  # Month_Oct
    0,  # Month_Sep
    0   # Month_Dec
]
predicted_units_simple = int(model.predict([manual_input])[0])
print("Predicted Units Sold (manual input):", predicted_units_simple)



R² Score: 0.9789487584201381
Mean Absolute Error: 20.712793481200215
Predicted Units Sold (manual input): 322


