In [4]:
import pandas as pd
import random

# Define sample data
product_names = ["Laptop", "Headphones", "Smartphone", "Camera", "Monitor", "Mouse", "Keyboard"]
categories = ["Electronics", "Accessories", "Gadgets", "Cameras"]

# Function to generate random dataset
def generate_dataset(num_samples):
    data = {
        "Product_Name": [random.choice(product_names) for _ in range(num_samples)],
        "Category": [random.choice(categories) for _ in range(num_samples)],
        "Price": [round(random.uniform(100, 2000), 2) for _ in range(num_samples)],
        "Discount": [round(random.uniform(5, 50), 2) for _ in range(num_samples)],
        "Sales_Quantity": [random.randint(1, 100) for _ in range(num_samples)],
    }

    # Calculate future price based on discount and sales quantity trends
    df = pd.DataFrame(data)
    df["Future_Price"] = df["Price"] * (1 - df["Discount"] / 100)  # Future price after discount

    return df

# Generate dataset with 1000 rows
ecommerce_data = generate_dataset(1000)

# Save the dataset to a CSV file
ecommerce_data.to_csv('ecommerce_data.csv', index=False)

print("Dataset created and saved as 'ecommerce_data.csv'")


Dataset created and saved as 'ecommerce_data.csv'


In [5]:
# e_commerce_price_prediction.py

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

# Step 1: Load Data (dummy dataset for illustration)
data = pd.read_csv('/content/ecommerce_data.csv')

# Step 2: Data Preprocessing (Feature selection and encoding)
X = data[['Category', 'Price', 'Discount', 'Sales_Quantity']]
y = data['Future_Price']

# Optional: Convert categorical data to numeric using one-hot encoding
X = pd.get_dummies(X, drop_first=True)

# Step 3: Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Define models
ridge = Ridge()
lasso = Lasso()
elastic = ElasticNet()

# Step 5: Use AutoML (GridSearch) to find the best model
param_grid = {
    'alpha': [0.01, 0.1, 1, 10, 100]
}

ridge_grid = GridSearchCV(ridge, param_grid, cv=5, scoring='r2')
lasso_grid = GridSearchCV(lasso, param_grid, cv=5, scoring='r2')
elastic_grid = GridSearchCV(elastic, param_grid, cv=5, scoring='r2')

# Step 6: Train each model and find the best
ridge_grid.fit(X_train, y_train)
lasso_grid.fit(X_train, y_train)
elastic_grid.fit(X_train, y_train)

# Step 7: Evaluate Models
models = {
    "Ridge": ridge_grid,
    "Lasso": lasso_grid,
    "ElasticNet": elastic_grid
}

for name, model in models.items():
    y_pred = model.predict(X_test)
    print(f"{name} R-squared score: {r2_score(y_test, y_pred)}")


Ridge R-squared score: 0.970090837408736
Lasso R-squared score: 0.9700632671286147
ElasticNet R-squared score: 0.9700523738294208


In [7]:
# test_e_commerce_price_prediction.py

import unittest
import pandas as pd
# The file name was likely incorrect. Changed to ipython-input-5-fa38f58547e0
from ipython_input_5_fa38f58547e0 import ridge_grid, lasso_grid, elastic_grid

class TestPricePrediction(unittest.TestCase):

    def setUp(self):
        self.data = pd.read_csv('ecommerce_data.csv')
        self.X = pd.get_dummies(self.data[['Category', 'Price', 'Discount', 'Sales_Quantity']], drop_first=True)
        self.y = self.data['Future_Price']

    def test_missing_values(self):
        # Test if there are any missing values
        self.assertFalse(self.X.isnull().values.any(), "Dataset contains missing values")

    def test_model_accuracy(self):
        # Test if models perform within an acceptable range
        ridge_score = ridge_grid.best_score_
        lasso_score = lasso_grid.best_score_
        elastic_score = elastic_grid.best_score_
        self.assertGreater(ridge_score, 0.7, "Ridge model accuracy is too low")
        self.assertGreater(lasso_score, 0.7, "Lasso model accuracy is too low")
        self.assertGreater(elastic_score, 0.7, "ElasticNet model accuracy is too low")

    def test_predictions(self):
        # Test if predictions are in the expected range
        ridge_pred = ridge_grid.predict(self.X)
        self.assertGreaterEqual(ridge_pred.min(), 0, "Predicted prices should not be negative")

if __name__ == '__main__':
    unittest.main()


ModuleNotFoundError: No module named 'ipython_input_5_fa38f58547e0'