In [194]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv('Q3_complex_linear_regression_dataset.csv')  # Replace 'sales_data.csv' with your dataset file name

# Encode categorical variables (X2 and X3)
label_encoder = LabelEncoder()
data['X2'] = label_encoder.fit_transform(data['X2'])

# Encode categorical variable X3 with numerical values
x3_mapping = {'A': 1.000, 'B': 2.000, 'C': 3.000}
data['X3'] = data['X3'].map(x3_mapping)

# Split the data into training and testing sets
X = data[['X1', 'X2', 'X3']]
y = data['y']
X_train, X_test, y_train, y_test = train_test_split(X.values, y.values, test_size=0.2, random_state=42)

# Convert numpy arrays to PyTorch tensors
X_train = torch.FloatTensor(X_train.astype('float32'))
y_train = torch.FloatTensor(y_train.astype('float32')).view(-1, 1)
X_test = torch.FloatTensor(X_test.astype('float32'))
y_test = torch.FloatTensor(y_test.astype('float32')).view(-1, 1)




# You can make predictions for new scenarios using the trained model as well.


In [195]:
# Define a linear regression model
class RidgeRegressionModel(nn.Module):
    def __init__(self, input_size, alpha=1.0):
        super(RidgeRegressionModel, self).__init__()
        self.linear = nn.Linear(input_size, 1)  # One output for regression
        self.alpha = alpha  # Regularization strength (adjust as needed)

    def forward(self, x):
        return self.linear(x)

In [196]:
# Create the model and define loss function and optimizer
model = RidgeRegressionModel(input_size=X_train.shape[1], alpha=0.1)  # You can adjust the alpha value
criterion = nn.MSELoss()  # Mean Squared Error loss for regression
optimizer = optim.SGD(model.parameters(), lr=0.01)  # Stochastic Gradient Descent

# Training the model
num_epochs = 1000  # Adjust as needed
for epoch in range(num_epochs):
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train) + model.alpha * torch.norm(model.linear.weight)  # L2 regularization term
    loss.backward()
    optimizer.step()

# Evaluate the model
model.eval()

RidgeRegressionModel(
  (linear): Linear(in_features=3, out_features=1, bias=True)
)

In [197]:
with torch.no_grad():
    y_pred = model(X_test)
count=0
print(y_pred[19])
print(y_test[19])
for i in range(len(y_pred)):
    if(abs(y_pred[i]-y_test[i])<1):
        count+=1
print("percentage accuracy =",(count/len(y_pred))*100)


mae = (mean_absolute_error(y_test, y_pred))/len(y_test)
mse = mean_squared_error(y_test, y_pred)/len(y_test)
r2 = r2_score(y_test, y_pred.numpy())/len(y_test)

print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Print the model parameters (coefficients)
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f'{name}: {param.data.numpy()}')


# You can make predictions for new scenarios using the trained model as well.

tensor([25.6760])
tensor([24.3622])
percentage accuracy = 33.33333333333333
Mean Absolute Error: 0.028550843397776287
Mean Squared Error: 0.07437984148661296
R-squared: 0.01572216522097546
linear.weight: [[3.0223153  1.9947672  0.14603294]]
linear.bias: [-0.3351007]
