In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import GridSearchCV

df=pd.read_csv('advertising.csv')

print(df.head())  
print(df.info())  

df.fillna(method='ffill',inplace=True)

X=df[['TV','Radio','Newspaper']] 
y=df['Sales']  

scaler=StandardScaler()
X_scaled=scaler.fit_transform(X)

X_train,X_test,y_train,y_test=train_test_split(X_scaled,y,test_size=0.2,random_state=42)

model=RandomForestRegressor(n_estimators=100,random_state=42)

model.fit(X_train,y_train)

y_pred=model.predict(X_test)

mae=mean_absolute_error(y_test,y_pred)
rmse=mean_squared_error(y_test,y_pred,squared=False)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")

def predict_sales(ad_budget_tv, ad_budget_radio, ad_budget_newspaper):
    new_data = pd.DataFrame({
        'TV': [ad_budget_tv],
        'Radio': [ad_budget_radio],
        'Newspaper': [ad_budget_newspaper]
    })

    new_data_scaled = scaler.transform(new_data)
    predicted_sales = model.predict(new_data_scaled)
    return predicted_sales[0]

predicted_sales = predict_sales(ad_budget_tv=150, ad_budget_radio=30, ad_budget_newspaper=20)
print(f"Predicted Sales: {predicted_sales}")
feature_importances = model.feature_importances_
print("Feature Importances:", feature_importances)
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20]
}
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3)
grid_search.fit(X_train, y_train)
print(f"Best Parameters from Grid Search: {grid_search.best_params_}")


      TV  Radio  Newspaper  Sales
0  230.1   37.8       69.2   22.1
1   44.5   39.3       45.1   10.4
2   17.2   45.9       69.3   12.0
3  151.5   41.3       58.5   16.5
4  180.8   10.8       58.4   17.9
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   TV         200 non-null    float64
 1   Radio      200 non-null    float64
 2   Newspaper  200 non-null    float64
 3   Sales      200 non-null    float64
dtypes: float64(4)
memory usage: 6.4 KB
None
Mean Absolute Error (MAE): 0.9162249999999993
Root Mean Squared Error (RMSE): 1.198395062156049
Predicted Sales: 15.578999999999997
Feature Importances: [0.84535495 0.13664184 0.01800321]
Best Parameters from Grid Search: {'max_depth': None, 'n_estimators': 300}
