In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import ttest_rel

# Sample data (Replace this with your actual data)
data = {
    'Sales': [100, 150, 120, 180, 200, 90, 110, 130, 160, 140],
    'Digital_Marketing': [0, 1, 1, 1, 1, 0, 0, 1, 1, 0],
    'Region': ['North', 'South', 'East', 'West', 'North', 'South', 'East', 'West', 'North', 'South'],
    'Manager': ['A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B']
}

df = pd.DataFrame(data)

# a. Increase in sales after digital marketing
sales_before_digital = df[df['Digital_Marketing'] == 0]['Sales']
sales_after_digital = df[df['Digital_Marketing'] == 1]['Sales']

# Perform a t-test to check if there is a significant difference
t_stat, p_value = ttest_rel(sales_before_digital, sales_after_digital)
if p_value < 0.05:
    print("There is a significant increase in sales after digital marketing.")
else:
    print("There is no significant increase in sales after digital marketing.")

# b. Dependency between "Region" and "Manager"
contingency_table = pd.crosstab(df['Region'], df['Manager'])
chi2, p_value, _, _ = chi2_contingency(contingency_table)

if p_value < 0.05:
    print("There is a significant dependency between Region and Manager.")
else:
    print("There is no significant dependency between Region and Manager.")

# Visualization of data
plt.figure(figsize=(12, 6))
sns.boxplot(x='Digital_Marketing', y='Sales', data=df)
plt.title('Sales Before and After Digital Marketing')
plt.xlabel('Digital Marketing (0: Before, 1: After)')
plt.ylabel('Sales')
plt.show()

# Visualization of dependency
plt.figure(figsize=(10, 6))
sns.heatmap(contingency_table, annot=True, cmap='viridis', fmt='d')
plt.title('Dependency Between Region and Manager')
plt.show()


ValueError: ignored