In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
data = pd.read_csv("Pharma_dtaa.csv")

# Data cleaning
data.drop_duplicates(inplace=True)
data.dropna(inplace=True)

# Bar plot: Total sales per region
region_sales = data.groupby("Region")["Sales"].sum().reset_index()
plt.figure(figsize=(8, 6))
sns.barplot(x="Region", y="Sales", data=region_sales)
plt.title("Total Sales per Region")
plt.xlabel("Region")
plt.ylabel("Total Sales")
plt.show()

# Scatter plot: Marketing Spend vs Sales
plt.figure(figsize=(8, 6))
sns.scatterplot(x="Marketing_Spend", y="Sales", data=data)
plt.title("Marketing Spend vs Sales")
plt.xlabel("Marketing Spend")
plt.ylabel("Sales")
plt.show()

# Boxplot: Drug effectiveness across age groups
plt.figure(figsize=(8, 6))
sns.boxplot(x="Age_Group", y="Effectiveness", data=data)
plt.title("Drug Effectiveness by Age Group")
plt.xlabel("Age Group")
plt.ylabel("Effectiveness")
plt.show()

# Line plot: Sales trend for each product over trial periods
plt.figure(figsize=(10, 6))
sns.lineplot(x="Trial_Period", y="Sales", hue="Product", data=data)
plt.title("Sales Trend per Product over Trial Periods")
plt.xlabel("Trial Period")
plt.ylabel("Sales")
plt.show()

# Heatmap: Correlation between Sales, Marketing Spend, and Effectiveness
correlation_data = data[["Sales", "Marketing_Spend", "Effectiveness"]].corr()
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_data, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Heatmap")
plt.show()
