In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os


df = pd.read_csv("/Users/dariyabaigereyeva/Desktop/product masters/venv/sales_data.csv")

print("✅ Данные успешно загружены!")


output_dir = "/Users/dariyabaigereyeva/Desktop/product masters/venv/eda_outputs"
os.makedirs(output_dir, exist_ok=True)


numeric_cols = ['Unit price', 'Quantity', 'Tax 5%', 'Total', 'cogs', 'gross income', 'Rating']


plt.figure(figsize=(16, 10))
for i, col in enumerate(numeric_cols, 1):
    plt.subplot(3, 3, i)
    sns.boxplot(data=df, y=col)
    plt.title(f'Boxplot: {col}')
plt.tight_layout()
plt.savefig(f"{output_dir}/boxplots_outliers.png")
plt.close()
print("✅ Выбросы сохранены как boxplots_outliers.png")


categorical_cols = ['Branch', 'City', 'Customer type', 'Gender', 'Product line', 'Payment']
for col in categorical_cols:
    plt.figure(figsize=(6, 4))
    sns.countplot(data=df, x=col)
    plt.title(f'Distribution: {col}')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(f"{output_dir}/cat_balance_{col}.png")
    plt.close()


sns.pairplot(df[['Unit price', 'Quantity', 'Total', 'Rating']])
plt.savefig(f"{output_dir}/pairplot.png")
plt.close()
print("✅ Pairplot сохранён")


plt.figure(figsize=(10, 8))
sns.heatmap(df[numeric_cols].corr(), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Heatmap")
plt.tight_layout()
plt.savefig(f"{output_dir}/correlation_heatmap.png")
plt.close()
print("✅ Корреляция сохранена")


outliers_df = pd.DataFrame()
for col in numeric_cols:
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR
    outliers = df[(df[col] < lower) | (df[col] > upper)]
    if not outliers.empty:
        outliers_df = pd.concat([outliers_df, outliers])

outliers_df = outliers_df.drop_duplicates()
outliers_path = "/Users/dariyabaigereyeva/Desktop/product masters/venv/sales_outliers.xlsx"
outliers_df.to_excel(outliers_path, index=False)
print(f"✅ Файл с выбросами создан: {outliers_path}")