In [1]:
import pandas as pd

# Load the dataset
df = pd.read_csv('advertising.csv')

# Display the first few rows
df.head()


Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,58.5,16.5
4,180.8,10.8,58.4,17.9


In [2]:
# Check for missing values
print(df.isnull().sum())

# Handle missing values (if any)
df.dropna(inplace=True)

# Encode categorical variables if necessary
df = pd.get_dummies(df, drop_first=True)

df.head()

TV           0
Radio        0
Newspaper    0
Sales        0
dtype: int64


Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,58.5,16.5
4,180.8,10.8,58.4,17.9


In [3]:
# Example: Creating a new feature from existing ones
df['Total_Advertising_Expenditure'] = df['TV'] + df['Radio'] + df['Newspaper']

# Drop any irrelevant features
df.drop(['TV', 'Radio', 'Newspaper'], axis=1, inplace=True)

df.head()

Unnamed: 0,Sales,Total_Advertising_Expenditure
0,22.1,337.1
1,10.4,128.9
2,12.0,132.4
3,16.5,251.3
4,17.9,250.0


In [4]:
from sklearn.model_selection import train_test_split

# Features and target variable
X = df.drop('Sales', axis=1)
y = df['Sales']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Initialize the model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')


Mean Squared Error: 5.632874672713669
R^2 Score: 0.8177128104593795


In [6]:
# Print evaluation metrics
print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')


Mean Squared Error: 5.632874672713669
R^2 Score: 0.8177128104593795


In [7]:
from sklearn.ensemble import RandomForestRegressor

# Initialize the model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Make predictions
y_pred_rf = rf_model.predict(X_test)

# Evaluate the model
mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print(f'Random Forest Mean Squared Error: {mse_rf}')
print(f'Random Forest R^2 Score: {r2_rf}')


Random Forest Mean Squared Error: 9.361464263444455
Random Forest R^2 Score: 0.697050776074494
