# Week 7: Statistical Business Analysis
Analysis using your provided Sales and Customer Churn datasets.

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm

# Load datasets (your provided data)
sales = pd.read_csv('business_data.csv')
churn = pd.read_csv('customer_churn.csv')

print('Sales Data Preview')
display(sales.head())

print('Churn Data Preview')
display(churn.head())

## Descriptive Statistics (Sales)

In [None]:
sales_desc = sales[['Quantity','Price','Total_Sales']].describe()
sales_desc

## Histogram of Total Sales

In [None]:
plt.figure()
sns.histplot(sales['Total_Sales'], kde=True)
plt.title('Total Sales Distribution')
plt.savefig('sales_histogram.png')
plt.close()

## Correlation Analysis (Sales)

In [None]:
corr = sales[['Quantity','Price','Total_Sales']].corr()
print(corr)

plt.figure()
sns.heatmap(corr, annot=True)
plt.title('Correlation Heatmap')
plt.savefig('correlation_heatmap.png')
plt.close()

## Hypothesis Test (Churn vs Monthly Charges)

In [None]:
group1 = churn[churn['Churn']==1]['MonthlyCharges']
group0 = churn[churn['Churn']==0]['MonthlyCharges']
tstat, pval = stats.ttest_ind(group1, group0, equal_var=False)
print(f'T-test: t={tstat:.4f}, p={pval:.6f}')

## Regression (Price â†’ Total Sales)

In [None]:
X = sales[['Price']]
y = sales['Total_Sales']
model = LinearRegression().fit(X, y)
r2 = model.score(X, y)
print('R-squared:', r2)