<a href="https://colab.research.google.com/github/Suthir731/Codsoft-Intern/blob/main/task_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset (replace with your dataset path or URL)
data = pd.read_csv("")

# Display the first few rows of the dataset
print(data.head())
print(data.info())

# Data Preprocessing
# Handle missing values (if any)
data = data.dropna()

# Define features and target variable
# Assuming the dataset has features like 'Advertising_Expenditure', 'Audience_Segmentation', and 'Advertising_Platform'
X = data[['Advertising_Expenditure', 'Audience_Segmentation', 'Advertising_Platform']]
y = data['Sales']

# Preprocessing pipeline
# Numeric features are scaled
# Categorical features are one-hot encoded
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['Advertising_Expenditure']),
        ('cat', OneHotEncoder(), ['Audience_Segmentation', 'Advertising_Platform'])
    ])

# Define the model
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse:.2f}')
print(f'R^2 Score: {r2:.2f}')

# Visualization of actual vs predicted sales
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.7)
plt.xlabel('Actual Sales')
plt.ylabel('Predicted Sales')
plt.title('Actual vs Predicted Sales')
plt.show()

# Visualization of feature importance (if applicable)
# For LinearRegression, we can show the coefficients of the features
coefficients = model.named_steps['regressor'].coef_
feature_names = model.named_steps['preprocessor'].transformers_[1][1].get_feature_names_out(['Audience_Segmentation', 'Advertising_Platform'])
feature_names = ['Advertising_Expenditure'] + list(feature_names)
coeff_df = pd.DataFrame({'Feature': feature_names, 'Coefficient': coefficients})
coeff_df = coeff_df.sort_values(by='Coefficient', ascending=False)

plt.figure(figsize=(12, 8))
sns.barplot(x='Coefficient', y='Feature', data=coeff_df)
plt.title('Feature Importance (Coefficients) in Sales Prediction')
plt.show()
