In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


In [None]:
df = pd.read_csv("advertising.csv")
df.head()

In [None]:
df.info()            # Check data types and missing values
df.describe()        # Summary statistics
df.isnull().sum()    # Count missing values


In [None]:
# Relationship between features and sales
sns.pairplot(df)
plt.show()

# Correlation between features
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.show()


In [None]:
X = df[['TV', 'Radio', 'Newspaper']]  # Features
y = df['Sales']                       # Target

# Split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
model = LinearRegression()
model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)


In [None]:
print("R² Score:", r2_score(y_test, y_pred))
print("Mean Squared Error:", mean_squared_error(y_test, y_pred))


In [None]:
plt.scatter(y_test, y_pred, color='green')
plt.xlabel("Actual Sales")
plt.ylabel("Predicted Sales")
plt.title("Actual vs Predicted Sales")
plt.show()
