<a href="https://colab.research.google.com/github/HuanAII/Excercise/blob/main/ProjectSalesPrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


In [None]:
class CustomLinearRegression:
    def __init__(self, X_data, y_target, learning_rate=0.01, num_epochs=10000):
        self.num_samples = X_data.shape[0]
        self.X_data = np.c_[np.ones((self.num_samples, 1)), X_data]
        self.y_target = y_target
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs

        # Initial weights
        self.theta = np.random.randn(self.X_data.shape[1], 1)
        self.losses = []

    def compute_loss(self, y_pred, y_target):
        loss=1/(self.num_samples)*np.sum((y_pred-y_target)**2)
        return loss

    def predict(self, X_data):
        y_pred = X_data.dot(self.theta)**2
        return y_pred

    def fit(self):
        for epoch in range(self.num_epochs):
            y_pred=self.predict(self.X_data)
            loss=self.compute_loss(y_pred,self.y_target)
            self.losses.append(loss)

            loss_grd=2*(y_pred-self.y_target)/self.num_samples
            gradients=self.X_data.T.dot(loss_grd)

            self.theta=self.theta-self.learning_rate*loss_grd
            if (epoch % 50) == 0:
                print(f'Epoch: {epoch} - Loss: {loss}')

        return {
            'loss': sum(self.losses) / len(self.losses),
            'weight': self.theta
        }


In [None]:
def r2score(y_pred, y):
    rss = np.sum((y_pred - y) ** 2)
    tss = np.sum((y - y.mean()) ** 2)
    r2 = 1 - (rss / tss)
    return r2


In [None]:
y_pred = np.array([1, 2, 3, 4, 5])
y = np.array([1, 2, 3, 4, 5])
r2score(y_pred, y)

1.0

In [None]:
y_pred = np.array([1, 2, 3, 4, 5])
y = np.array([3, 5, 5, 2, 4])
r2score(y_pred, y)


-2.235294117647059

In [None]:
#Polynomial Regression
def create_polynomial_feature(X,degree=2):
     X_new = X
     for d in range(2, degree+1):
       X_new = np.c_[X_new, np.power(X, d)]
     return X_new

X = np.array([[1], [2], [3]])
create_polynomial_feature(X, degree=2)

array([[1, 1],
       [2, 4],
       [3, 9]])

In [5]:
def create_polynomial_features(X, degree=2):
    """Creates polynomial features.

    Args:
        X: An array of the data.
        degree: An integer for the degree of the generated polynomial function.
    """
    X_mem = []
    for X_sub in X.T:
        X_sub = X_sub.T
        X_new = X_sub
        for d in range(2, degree + 1):
            X_new = np.c_[X_new, np.power(X_sub, d)]
        X_mem.extend(X_new.T)
    return np.c_[X_mem].T

X = np.array([[1, 2],
 [2, 3],
 [3, 4]])
print(X.shape)
degree = 2
create_polynomial_features(X, degree)


(3, 2)


array([[ 1,  1,  2,  4],
       [ 2,  4,  3,  9],
       [ 3,  9,  4, 16]])

In [11]:
#3 Sale Prediction
df=pd.read_csv('/content/SalesPrediction.csv')
df=pd.get_dummies(df,columns=['Influencer'])
df.isnull().sum()
df.fillna(df.mean(),inplace=True)
df.head()

Unnamed: 0,TV,Radio,Social Media,Sales,Influencer_Macro,Influencer_Mega,Influencer_Micro,Influencer_Nano
0,16.0,6.566231,2.907983,54.732757,False,True,False,False
1,13.0,9.237765,2.409567,46.677897,False,True,False,False
2,41.0,15.886446,2.91341,150.177829,False,True,False,False
3,83.0,30.020028,6.922304,298.24634,False,True,False,False
4,15.0,8.437408,1.405998,56.594181,False,False,True,False


In [18]:
 X = df[['TV', 'Radio', 'Social Media', 'Influencer_Macro', 'Influencer_Mega',
 'Influencer_Micro', 'Influencer_Nano']]
 y = df[['Sales']]
 print(X)

        TV      Radio  Social Media  Influencer_Macro  Influencer_Mega  \
0     16.0   6.566231      2.907983             False             True   
1     13.0   9.237765      2.409567             False             True   
2     41.0  15.886446      2.913410             False             True   
3     83.0  30.020028      6.922304             False             True   
4     15.0   8.437408      1.405998             False            False   
...    ...        ...           ...               ...              ...   
4567  26.0   4.472360      0.717090             False            False   
4568  71.0  20.610685      6.545573             False            False   
4569  44.0  19.800072      5.096192             False            False   
4570  71.0  17.534640      1.940873              True            False   
4571  42.0  15.966688      5.046548             False            False   

      Influencer_Micro  Influencer_Nano  
0                False            False  
1                False     

In [19]:
from sklearn.model_selection import train_test_split
X_train, X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0)

In [22]:
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
X_train_processed=scaler.fit_transform(X_train)
X_test_processed=scaler.transform(X_test)
scaler.mean_[0]

54.107708927005696

In [24]:
from sklearn.preprocessing import PolynomialFeatures
poly=PolynomialFeatures(degree=2)
X_train_poly=poly.fit_transform(X_train_processed)
X_test_poly=poly.fit_transform(X_test_processed)

In [26]:
 from sklearn.linear_model import LinearRegression
 from sklearn.metrics import r2_score
 poly_model = LinearRegression()
 poly_model.fit(X_train_poly, y_train)
 preds = poly_model.predict(X_test_poly)
 r2_score(y_test, preds)

0.9948244632652534