#Linear Regression

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [1]:
class CustomLinearRegression:
  def __init__(self, X_data, y_target, learning_rate = 0.01, epochs = 1000):
    self.num_samples = X_data.shape[0]
    self.X_data = np.c_[np.ones((self.num_samples,  1)), X_data]
    self.y_target = y_target
    self.learning_rate = learning_rate
    self.num_samples = num_samples

    self.theta = np.random.randn(self.X_data.shape[1], 1)
    self.losses = []

  def compute_loss(self, y_pred, y_target):
    return np.multiply((y_pred - y_target), (y_pred - y_target)).sum() / (2 * self.num_samples)

  def predict(self, X_data):
    return X_data.dot(self.theta)

  def fit(self):
    for epoch in range(self.epochs):
      y_pred = self.predict(self.X_data)
      loss = self.compute_loss(y_pred, self.y_target)
      self.losses.append(loss)
      gradient = self.X_data.T.dot(y_pred - self.y_target) / self.num_samples
      self.theta -= self.learning_rate * gradient
      if (epoch % 50)==0:
        print(f'Epoch: {epoch} - Loss: {loss}')
    return {
        'loss': self.losses,
        'weight': self.theta
    }

In [2]:
def r2score(y_pred, y):
  rss = np.sum((y_pred - y)**2)
  tss = np.sum((y - y.mean())**2)
  return 1 - (rss / tss)

In [6]:
y_pred = np.array([1, 2, 3, 4, 5])
y = np.array([1, 2, 3, 4, 5])
print(r2score(y_pred, y))
y_pred = np.array([1, 2, 3, 4, 5])
y = np.array([3, 5, 5, 2, 4])
print(r2score(y_pred, y))

1.0
-2.235294117647059


#Polynomial Regression

In [11]:
def create_polynomial_features(X, degree =2):
  X_new = X
  for i in range(2, degree + 1):
    X_new = np.c_[X_new, np.power(X, i)]
  return X_new

X = np.array([1, 2, 3, 4, 5])
X = np.array([[1, 2], [2, 3], [3, 4]])
print(create_polynomial_features(X, 2))

[[ 1  2  1  4]
 [ 2  3  4  9]
 [ 3  4  9 16]]


In [19]:
def create_polynomial_features(X, degree =2):
  X_mem = []
  for X_sub in X.T:
    X_new = X_sub
    for i in range(2, degree + 1):
      X_new = np.c_[X_new, np.power(X_sub, i)]
    X_mem.extend(X_new.T)
  return np.c_[X_mem].T

X = np.array([[1, 2], [2, 3], [3, 4]])
print(create_polynomial_features(X, 2))

[[ 1  1  2  4]
 [ 2  4  3  9]
 [ 3  9  4 16]]


#Sales Prediction

In [20]:
!gdown '1A8kK0IEsT3w8htzU18ihFr5UV-euhquC'

Downloading...
From: https://drive.google.com/uc?id=1A8kK0IEsT3w8htzU18ihFr5UV-euhquC
To: /content/SalesPrediction.csv
  0% 0.00/206k [00:00<?, ?B/s]100% 206k/206k [00:00<00:00, 99.0MB/s]


In [30]:
df = pd.read_csv('/content/SalesPrediction.csv')
df.head()

Unnamed: 0,TV,Radio,Social Media,Influencer,Sales
0,16.0,6.566231,2.907983,Mega,54.732757
1,13.0,9.237765,2.409567,Mega,46.677897
2,41.0,15.886446,2.91341,Mega,150.177829
3,83.0,30.020028,6.922304,Mega,298.24634
4,15.0,8.437408,1.405998,Micro,56.594181


In [31]:
df = pd.get_dummies(df)

In [28]:
df.head()

Unnamed: 0,TV,Radio,Social Media,Sales,Influencer_Macro,Influencer_Mega,Influencer_Micro,Influencer_Nano
0,16.0,6.566231,2.907983,54.732757,False,True,False,False
1,13.0,9.237765,2.409567,46.677897,False,True,False,False
2,41.0,15.886446,2.91341,150.177829,False,True,False,False
3,83.0,30.020028,6.922304,298.24634,False,True,False,False
4,15.0,8.437408,1.405998,56.594181,False,False,True,False


In [50]:
df.fillna(df.mean(), inplace=True)


In [51]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4572 entries, 0 to 4571
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   TV                4572 non-null   float64
 1   Radio             4572 non-null   float64
 2   Social Media      4572 non-null   float64
 3   Sales             4572 non-null   float64
 4   Influencer_Macro  4572 non-null   bool   
 5   Influencer_Mega   4572 non-null   bool   
 6   Influencer_Micro  4572 non-null   bool   
 7   Influencer_Nano   4572 non-null   bool   
dtypes: bool(4), float64(4)
memory usage: 160.9 KB


In [36]:
df.columns

Index(['TV', 'Radio', 'Social Media', 'Sales', 'Influencer_Macro',
       'Influencer_Mega', 'Influencer_Micro', 'Influencer_Nano'],
      dtype='object')

In [55]:
X = df[['TV', 'Radio', 'Social Media', 'Influencer_Macro',
       'Influencer_Mega', 'Influencer_Micro', 'Influencer_Nano']]

Y = df[['Sales']]

In [56]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

In [57]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train_processed = sc.fit_transform(X_train)
X_test_processed = sc.transform(X_test)

In [58]:
sc.mean_[0]

53.990612131916386

In [59]:
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree = 2)
X_train_poly = poly.fit_transform(X_train_processed)
X_test_poly = poly.transform(X_test_processed)

In [60]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

poly_model = LinearRegression()
poly_model.fit(X_train_poly, y_train)
y_pred = poly_model.predict(X_test_poly)
r2_score(y_test, y_pred)

0.9948352205316634