In [None]:
!pip install -U scikit-learn pandas matplotlib seaborn


Collecting pandas
  Downloading pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Collecting matplotlib
  Downloading matplotlib-3.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Downloading pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.1/13.1 MB[0m [31m103.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading matplotlib-3.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m116.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pandas, matplotlib
  Attempting uninstall: pandas
    Found existing installation: pandas 2.2.2
    Uninstalling pandas-2.2.2:
      Successfully uninstalle

In [None]:
from google.colab import files
import pandas as pd
uploaded = files.upload()
df = pd.read_csv('advertising.csv')

df.head()


Saving advertising.csv to advertising (1).csv


Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,58.5,16.5
4,180.8,10.8,58.4,17.9


In [None]:
df.isnull().sum()

df.fillna(df.mean(), inplace=True)

df = pd.get_dummies(df, drop_first=True)


In [None]:
from sklearn.model_selection import train_test_split

X = df.drop('Sales', axis=1)
y = df['Sales']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree Regressor': DecisionTreeRegressor(random_state=42),
    'Random Forest Regressor': RandomForestRegressor(random_state=42)
}

results = {}
for name, model in models.items():

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    results[name] = {'MSE': mse, 'R2': r2}


results_df = pd.DataFrame(results).T
results_df


Unnamed: 0,MSE,R2
Linear Regression,2.907757,0.905901
Decision Tree Regressor,3.402,0.889907
Random Forest Regressor,1.437433,0.953483


In [None]:
best_model_name = results_df['R2'].idxmax()
best_model = models[best_model_name]

import joblib
joblib.dump(best_model, 'best_sales_model.pkl')
print(f"The best model is: {best_model_name}")


The best model is: Random Forest Regressor


In [None]:
import joblib
import pandas as pd

best_model = joblib.load('best_sales_model.pkl')

def preprocess_input(user_input):
    input_df = pd.DataFrame([user_input])
    return input_df

def predict_sales(user_input):
    input_df = preprocess_input(user_input)
    prediction = best_model.predict(input_df)
    return prediction[0]

user_input = {
    'TV': 1000,
    'Radio': 500,
    'Newspaper': 300
}
predicted_sales = predict_sales(user_input)
print(f"Predicted sales: {predicted_sales}")


Predicted sales: 26.062


In [None]:
import joblib
import pandas as pd

best_model = joblib.load('best_sales_model.pkl')

def preprocess_input(user_input):
    input_df = pd.DataFrame([user_input])
    return input_df

def predict_sales(user_input):
    input_df = preprocess_input(user_input)

    prediction = best_model.predict(input_df)
    return prediction[0]

tv_input = float(input("Enter the advertising expenditure for TV: "))
radio_input = float(input("Enter the advertising expenditure for Radio: "))
newspaper_input = float(input("Enter the advertising expenditure for Newspaper: "))

user_input = {
    'TV': tv_input,
    'Radio': radio_input,
    'Newspaper': newspaper_input
}

predicted_sales = predict_sales(user_input)
print(f"Predicted sales: {predicted_sales}")


Enter the advertising expenditure for TV: 15
Enter the advertising expenditure for Radio: 8
Enter the advertising expenditure for Newspaper: 4
Predicted sales: 4.784000000000003
