In [1]:
import numpy as np

In [2]:
class CustomLinearRegression:
    def __init__(self, x_data, y_target, learning_rate=0.01, num_epochs=10000):
        self.num_samples = x_data.shape[0]
        self.x_data = np.c_[np.ones((self.num_samples, 1)), x_data]
        self.y_target = y_target
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs

        self.theta = np.random.randn(self.x_data.shape[1], 1)
        self.losses = []
    
    def compute_loss(self, y_pred, y_target):
        loss = (y_pred - y_target)**2
        return loss
    
    def predict(self, x_data):
        y_pred = x_data.dot(self.theta)
        return y_pred
    
    def fit(self):
        for epoch in range(self.num_epochs):
            y_pred = self.predict(self.x_data)

            loss= self.compute_loss(y_pred, self.y_target)
            self.losses.append(loss)

            loss_grad = 2*(y_pred - self.y_target)/self.num_samples
            gradients = self.x_data.T.dot(loss_grad)

            self.theta = self.theta - self.learning_rate*gradients

            if epoch%50 == 0:
                print(f'Epoch: {epoch} - Loss: {loss}')
            
            return {
                'loss': sum(self.losses)/len(self.losses),
                'weight': self.theta
            }

In [4]:
def r2score(y_pred, y):
    rss = np.sum((y - y_pred)**2)
    tss = np.sum((y - y.mean())**2)
    r2 = 1 - (rss/tss)
    return r2

In [6]:
y_pred = np.array([1, 2, 3, 4, 5])
y = np.array([1, 2, 3, 4, 5])
print(r2score(y_pred, y))

y_pred = np.array([1, 2, 3, 4, 5])
y = np.array([3, 5, 5, 2, 4])
print(r2score(y_pred, y))

1.0
-2.235294117647059


### Polynomial Regression

In [8]:
def create_polynomial_features(x, degree=2):

    x_new = x
    for d in range(2, degree+1):
        x_new = np.c_[x_new, np.power(x, d)]

    return x_new

In [9]:
def create_polynomial_features(x, degree=2):
    x_mem = []
    for x_sub in x.T:
        x_new = x_sub
        for d in range(2, degree+1):
            x_new = np.c_[x_new, np.power(x_sub, d)]
        x_mem.extend(x_new.T)
    return np.c_[x_mem].T

### Sales Prediction

In [10]:
import pandas as pd

In [11]:
df = pd.read_csv('SalesPrediction.csv')
df = pd.get_dummies(df)
df

Unnamed: 0,TV,Radio,Social Media,Sales,Influencer_Macro,Influencer_Mega,Influencer_Micro,Influencer_Nano
0,16.0,6.566231,2.907983,54.732757,False,True,False,False
1,13.0,9.237765,2.409567,46.677897,False,True,False,False
2,41.0,15.886446,2.913410,150.177829,False,True,False,False
3,83.0,30.020028,6.922304,298.246340,False,True,False,False
4,15.0,8.437408,1.405998,56.594181,False,False,True,False
...,...,...,...,...,...,...,...,...
4567,26.0,4.472360,0.717090,94.685866,False,False,True,False
4568,71.0,20.610685,6.545573,249.101915,False,False,False,True
4569,44.0,19.800072,5.096192,163.631457,False,False,True,False
4570,71.0,17.534640,1.940873,253.610411,True,False,False,False


In [12]:
df = df.fillna(df.mean())

feature_columns = [column for column in df.columns if column != 'Sales']
X = df[feature_columns]
y = df[['Sales']]

In [13]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)

scaler = StandardScaler()
X_train_processed = scaler.fit_transform(X_train)
X_test_processed = scaler.transform(X_test)
print(scaler.mean_[0])

poly_features = PolynomialFeatures(degree=2, interaction_only=False)
X_train_poly = poly_features.fit_transform(X_train_processed)
X_test_poly = poly_features.transform(X_test_processed)

poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)

preds = poly_model.predict(X_test_poly)
print(r2_score(y_test, preds))

54.173577723283785
0.9951771662021959
