# 📈 Sales Prediction using Simple Linear Regression in Python

In [None]:
# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
# Load the dataset
df = pd.read_csv("/mnt/data/advertising (2).csv")

# Display the first few rows
print("First 5 rows of data:")
print(df.head())

In [None]:
# Basic info
print("\nDataset Info:")
print(df.info())

# Check for null values
print("\nMissing values:")
print(df.isnull().sum())

In [None]:
# Visualize relationships
sns.pairplot(df, x_vars=['TV', 'Radio', 'Newspaper'], y_vars='Sales', kind='scatter', height=4)
plt.suptitle("Advertising Spend vs Sales")
plt.show()

In [None]:
# Let's use 'TV' advertising spend to predict 'Sales' (Simple Linear Regression)
X = df[['TV']]  # independent variable
y = df['Sales']  # dependent variable

# Split the data (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

In [None]:
# Evaluate the model
print("\nModel Evaluation:")
print(f"R^2 Score: {r2_score(y_test, y_pred):.3f}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred):.3f}")

In [None]:
# Visualize regression line
plt.figure(figsize=(8, 5))
plt.scatter(X_test, y_test, color='blue', label='Actual')
plt.plot(X_test, y_pred, color='red', linewidth=2, label='Predicted')
plt.title('TV Advertising vs Sales')
plt.xlabel('TV Advertising Spend')
plt.ylabel('Sales')
plt.legend()
plt.show()