In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [3]:
data = pd.read_csv('Advertising.csv')
data

Unnamed: 0.1,Unnamed: 0,TV,Radio,Newspaper,Sales
0,1,230.1,37.8,69.2,22.1
1,2,44.5,39.3,45.1,10.4
2,3,17.2,45.9,69.3,9.3
3,4,151.5,41.3,58.5,18.5
4,5,180.8,10.8,58.4,12.9
...,...,...,...,...,...
195,196,38.2,3.7,13.8,7.6
196,197,94.2,4.9,8.1,9.7
197,198,177.0,9.3,6.4,12.8
198,199,283.6,42.0,66.2,25.5


In [4]:
print(data.head())

   Unnamed: 0     TV  Radio  Newspaper  Sales
0           1  230.1   37.8       69.2   22.1
1           2   44.5   39.3       45.1   10.4
2           3   17.2   45.9       69.3    9.3
3           4  151.5   41.3       58.5   18.5
4           5  180.8   10.8       58.4   12.9


In [19]:
X = data.drop('Sales', axis=1)
y = data['Sales'] 
X
y

0      22.1
1      10.4
2       9.3
3      18.5
4      12.9
       ... 
195     7.6
196     9.7
197    12.8
198    25.5
199    13.4
Name: Sales, Length: 200, dtype: float64

In [18]:
X = pd.get_dummies(X, drop_first=True)
X

Unnamed: 0.1,Unnamed: 0,TV,Radio,Newspaper
0,1,230.1,37.8,69.2
1,2,44.5,39.3,45.1
2,3,17.2,45.9,69.3
3,4,151.5,41.3,58.5
4,5,180.8,10.8,58.4
...,...,...,...,...
195,196,38.2,3.7,13.8
196,197,94.2,4.9,8.1
197,198,177.0,9.3,6.4
198,199,283.6,42.0,66.2


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
model = RandomForestRegressor(n_estimators=100, random_state=42)

In [9]:
model.fit(X_train, y_train)

In [15]:
y_pred = model.predict(X_test)
y_pred

array([17.649, 21.646, 20.622,  6.639, 22.952, 13.469, 22.327,  9.733,
       11.809, 15.464,  7.37 ,  9.192, 12.026,  4.98 , 10.485, 12.615,
        6.68 , 16.539, 11.243, 19.855, 20.326, 12.902, 10.291, 22.116,
       10.076,  9.14 , 22.054, 12.694, 10.206,  5.036, 11.621, 10.861,
       21.991,  8.18 , 15.207, 20.628, 12.516, 20.964, 12.188,  7.8  ])

In [16]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mse
r2

0.9785539978317583

In [12]:
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

Mean Squared Error: 0.6769128999999989
R^2 Score: 0.9785539978317583


In [17]:
importances = model.feature_importances_
indices = np.argsort(importances)[::-1]
importances
indices

array([1, 2, 3, 0], dtype=int64)

In [14]:
print("Feature ranking:")
for f in range(X.shape[1]):
    print(f"{f + 1}. feature {X.columns[indices[f]]} ({importances[indices[f]]})")

Feature ranking:
1. feature TV (0.6221642041398673)
2. feature Radio (0.3599927494656584)
3. feature Newspaper (0.012242682905477335)
4. feature Unnamed: 0 (0.0056003634889971)
