# Task 8: Predict the Future – Intro to Machine Learning

We will build a regression model to predict the price of a product.

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score

np.random.seed(42)
n_samples = 200
categories = ['Electronics', 'Clothing', 'Furniture']
sizes = ['Small', 'Medium', 'Large']
demand_levels = ['Low', 'Medium', 'High']

data = pd.DataFrame({
    'Category': np.random.choice(categories, n_samples),
    'Size': np.random.choice(sizes, n_samples),
    'Demand': np.random.choice(demand_levels, n_samples),
    'BaseCost': np.random.randint(100, 1000, n_samples)
})

demand_multiplier = {'Low': 1.1, 'Medium': 1.3, 'High': 1.6}
data['Price'] = data.apply(lambda x: x['BaseCost'] * demand_multiplier[x['Demand']] + np.random.normal(0, 50), axis=1)

# Label Encoding
le_category = LabelEncoder()
le_size = LabelEncoder()
le_demand = LabelEncoder()
data['Category'] = le_category.fit_transform(data['Category'])
data['Size'] = le_size.fit_transform(data['Size'])
data['Demand'] = le_demand.fit_transform(data['Demand'])

# Train-Test Split
X = data[['Category', 'Size', 'Demand', 'BaseCost']]
y = data['Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('Mean Squared Error:', mse)
print('R² Score:', r2)