In [None]:
#//Task 3

#Sales prediction means predicting how much of a product people will buy based on factors
#such as the amount you spend to advertise your product, the segment of people you
#advertise for, or the platform you are advertising on about your product.
#Typically, a product and service-based business always need their Data Scientist to predict
#their future sales with every step they take to manipulate the cost of advertising their
#product. So let’s start the task of sales prediction with machine learning using Python.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

data = pd.read_csv('./Oibsip/Advertising.csv')

print("First few rows of the dataset:")
print(data.head())

print("\nDataset info:")
print(data.info())
print("\nDataset description:")
print(data.describe())

average_sales = data[['TV', 'Radio', 'Newspaper']].mean()
average_sales.plot(kind='bar', color=['skyblue', 'orange', 'green'])
plt.title('Average Advertising Spend by Platform')
plt.ylabel('Spend in Thousands')
plt.show()

total_spend = average_sales.sum()
spend_share = average_sales / total_spend * 100
spend_share.plot(kind='pie', autopct='%1.1f%%', colors=['skyblue', 'orange', 'green'], startangle=90)
plt.title('Advertising Spend Share by Platform')
plt.ylabel('')
plt.show()

plt.plot(data['Sales'], color='purple')
plt.title('Sales Trend Over Data Points')
plt.xlabel('Data Points')
plt.ylabel('Sales')
plt.show()

plt.figure(figsize=(10, 6))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

X = data[['TV', 'Radio', 'Newspaper']]
y = data['Sales']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"\nMean Squared Error: {mse}")
print(f"R-squared: {r2}")

plt.scatter(y_test, y_pred, color='blue')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.xlabel('Actual Sales')
plt.ylabel('Predicted Sales')
plt.title('Actual vs. Predicted Sales')
plt.show()
