In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import r2_score, mean_squared_error
import joblib

In [2]:
df = pd.read_csv('advertising.csv')
df.head()

Unnamed: 0,tv,radio,social_media,influencer,sales
0,16000.0,6566.23,2907.98,Mega,54732.76
1,13000.0,9237.76,2409.57,Mega,46677.9
2,41000.0,15886.45,2913.41,Mega,150177.83
3,83000.0,30020.03,6922.3,Mega,298246.34
4,15000.0,8437.41,1406.0,Micro,56594.18


In [3]:
df = df.drop_duplicates()
df = df.dropna()

le = LabelEncoder()
df['influencer'] = le.fit_transform(df['influencer'])

for col in ['tv', 'radio', 'social_media', 'sales']:
    df[col] = pd.to_numeric(df[col], errors='coerce')

df = df.dropna()


In [4]:
X = df[['tv', 'radio', 'social_media', 'influencer']]
y = df['sales']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
model = LinearRegression()
model.fit(X_train, y_train)


In [6]:
y_pred = model.predict(X_test)
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f"R² Score: {r2:.4f}")
print(f"Mean Squared Error: {mse:.4f}")


R² Score: 0.9990
Mean Squared Error: 8322554.7524


In [7]:
joblib.dump(model, 'mlr_model.joblib')
print("✅ Model saved as mlr_model.joblib")


✅ Model saved as mlr_model.joblib
