# Sales Prediction using Machine Learning in Python

This notebook demonstrates a step-by-step workflow for predicting sales based on advertising expenditure, audience segmentation, and platform selection.

## 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score


## 2. Load Dataset

In [None]:
df = pd.read_csv('data/sample_sales_data.csv')
df.head()

## 3. Exploratory Data Analysis (EDA)

In [None]:
print(df.info())
print(df.describe())
sns.pairplot(df, hue='Advertising_Platform')
plt.show()

## 4. Data Preprocessing

In [None]:
# Encode categorical variables
le_segment = LabelEncoder()
le_platform = LabelEncoder()
df['Target_Audience_Segment'] = le_segment.fit_transform(df['Target_Audience_Segment'])
df['Advertising_Platform'] = le_platform.fit_transform(df['Advertising_Platform'])
df.head()

## 5. Train-Test Split

In [None]:
X = df.drop('Sales', axis=1)
y = df['Sales']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## 6. Model Training

In [None]:
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)


## 7. Evaluation

In [None]:
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f'RMSE: {rmse:.2f}')
print(f'R^2 Score: {r2:.2f}')


## 8. Feature Importance

In [None]:
importances = model.feature_importances_
feature_names = X.columns
feat_imp = pd.Series(importances, index=feature_names).sort_values(ascending=False)
feat_imp.plot(kind='bar')
plt.title('Feature Importance')
plt.show()