In [None]:
# Importing required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from scipy import stats
import statsmodels.api as sm

In [None]:
class mmm():

    def explore(self, **kwargs):
        # reading the required data file
        data = pd.read_csv('{}'.format(kwargs['file_name']))
        # removing all the extra columns
        data = data[kwargs['independent_col'] + (kwargs['dependent_col'])]

        # exploring the data
        labels = data[kwargs['dependent_col']]
        print(labels.head())
        print(labels.describe())

        features = data[kwargs['independent_col']]
        print(features.head())
        print(features.describe())

        # creating scatter graph b/w labels and all the features to check data skreness
        for item in features:
            fig = px.scatter(data, x="{}".format(item), y="{}".format(labels.columns[0]))
            fig.show()

            plt.hist(features[item], bins='auto')
            plt.title(item)
            plt.show()
            
        return labels, features

    
    def transform(self, **kwargs):
        features = kwargs['features']
        labels = kwargs['labels']
        
        for item in kwargs['transform_col']:
            x = features[item]
            features[item], _ = stats.boxcox(x)
            data = pd.concat([features[item], labels], axis=1)
            fig = px.scatter(data, x="{}".format(item), y="{}".format(labels.columns[0]))
            fig.show()

            plt.hist(features[item], bins='auto')
            plt.title(item)
            plt.show()
    
        return labels, features


    def model(self, **kwargs):

        model = sm.OLS(kwargs['labels'], kwargs['features']).fit()
        print(model.summary())

        #Model 2 Parameters, error, and r square
        print('Parameters: ', model.params)
        print('R2: ', model.rsquared)
        print('Standard errors: ', model.bse)

        # Actual and predicted values
        y_pred = model.predict()
        df1 = pd.DataFrame({'Actual': kwargs['labels']['sales'], 'Predicted': y_pred})
        print(df1.head(10))
        fig = px.line(df1, x=df1.index, y=['Actual', 'Predicted'])
        fig.show()


In [None]:
myobject = mmm()

In [None]:
labels, features = myobject.explore(file_name='Advertising.csv',
                independent_col=['TV', 'radio', 'newspaper'],
                dependent_col=['sales'])

In [None]:
lables, features = myobject.transform(features=features,labels=labels,
                    transform_col=['newspaper'])

In [None]:
myobject.model(features=features, labels=labels)