### Importing Libraries

In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

### Loading Dataset

In [3]:
data = pd.read_csv("global_sports_footwear_sales_2018_2026.csv")
data

Unnamed: 0,order_id,order_date,brand,model_name,category,gender,size,color,base_price_usd,discount_percent,final_price_usd,units_sold,revenue_usd,payment_method,sales_channel,country,customer_income_level,customer_rating
0,ORD100000,2021-01-30,ASICS,Model-370,Running,Unisex,8,Black,162,15,137.70,1,137.70,Card,Retail Store,Germany,Low,4.6
1,ORD100001,2026-10-05,Reebok,Model-314,Lifestyle,Men,8,Grey,80,5,76.00,3,228.00,Card,Online,USA,Low,3.9
2,ORD100002,2023-11-12,ASICS,Model-763,Lifestyle,Men,8,Black,176,15,149.60,4,598.40,Cash,Retail Store,India,Medium,3.0
3,ORD100003,2026-08-29,Reebok,Model-905,Basketball,Women,7,White,61,15,51.85,2,103.70,Card,Retail Store,India,High,3.4
4,ORD100004,2019-11-09,Nike,Model-413,Training,Men,11,Black,80,0,80.00,4,320.00,Cash,Online,USA,Medium,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,ORD129995,2018-05-28,Puma,Model-892,Gym,Women,6,Blue,140,10,126.00,1,126.00,Card,Online,UAE,High,4.3
29996,ORD129996,2018-04-28,Adidas,Model-982,Running,Women,9,Grey,113,15,96.05,1,96.05,Card,Online,UK,High,3.5
29997,ORD129997,2026-02-17,ASICS,Model-463,Basketball,Men,11,Blue,130,30,91.00,1,91.00,Wallet,Online,India,Medium,4.4
29998,ORD129998,2024-02-21,New Balance,Model-984,Running,Unisex,11,White,186,10,167.40,2,334.80,Wallet,Retail Store,USA,High,3.9


### Feature Selection

In [4]:
data["final_price"] = data["base_price_usd"] * (1 - data["discount_percent"]/100)
X = data[["size", "base_price_usd", "discount_percent"]]
y = data["final_price"]

### Train-Test Split

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
# Models
models = {
    "Linear": LinearRegression(),
    "Ridge": Ridge(alpha=1),
    "Lasso": Lasso(alpha=0.1),
    "ElasticNet": ElasticNet(alpha=0.1, l1_ratio=0.5)}

In [7]:
# Train and compare
for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    mse = mean_squared_error(y_test, preds)

    print("\n", name)
    print("Coefficients:", model.coef_)
    print("MSE:", mse)


 Linear
Coefficients: [-0.01343843  0.86713323 -1.40095902]
MSE: 20.907020595093808

 Ridge
Coefficients: [-0.01343822  0.86713321 -1.40095834]
MSE: 20.90701800915096

 Lasso
Coefficients: [-0.          0.86708776 -1.39994153]
MSE: 20.901469091773805

 ElasticNet
Coefficients: [-0.          0.86709092 -1.39973769]
MSE: 20.900773161677204


### Feature Removal Check

In [8]:
print("Features removed by Lasso:", X.columns[model.coef_ == 0])

Features removed by Lasso: Index(['size'], dtype='object')
