# تحليل البيانات

في هذا الدفتر سنقوم بتحليل بيانات المبيعات لفهم الاتجاهات والأنماط.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import os

# Set style for plots
plt.style.use('seaborn')
sns.set_palette('husl')

# Enable inline plotting
%matplotlib inline

## تحميل ومعالجة البيانات

In [None]:
# Load data
df = pd.read_csv('../data/train.csv')
print(f"Data loaded. Shape: {df.shape}")

# Display first few rows
df.head()

In [None]:
# Convert dates
df['Order Date'] = pd.to_datetime(df['Order Date'])
df['Ship Date'] = pd.to_datetime(df['Ship Date'])

# Add features
df['Year'] = df['Order Date'].dt.year
df['Month'] = df['Order Date'].dt.month
df['Day'] = df['Order Date'].dt.day
df['DayOfWeek'] = df['Order Date'].dt.dayofweek
df['ShippingDays'] = (df['Ship Date'] - df['Order Date']).dt.days

print("Features added successfully.")

## تحليل اتجاهات المبيعات

In [None]:
# Monthly sales trend
plt.figure(figsize=(12, 6))
monthly_sales = df.groupby(['Year', 'Month'])['Sales'].sum().reset_index()
monthly_sales['Date'] = pd.to_datetime(monthly_sales[['Year', 'Month']].assign(DAY=1))
plt.plot(monthly_sales['Date'], monthly_sales['Sales'])
plt.title('Monthly Sales Trend')
plt.xlabel('Date')
plt.ylabel('Total Sales')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## تحليل توزيع الفئات

In [None]:
# Category distribution
plt.figure(figsize=(10, 6))
category_sales = df.groupby('Category')['Sales'].sum().sort_values(ascending=False)
category_sales.plot(kind='bar')
plt.title('Sales by Category')
plt.xlabel('Category')
plt.ylabel('Total Sales')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## تحليل شرائح العملاء

In [None]:
# Sales by segment
plt.figure(figsize=(8, 6))
segment_sales = df.groupby('Segment')['Sales'].sum()
plt.pie(segment_sales, labels=segment_sales.index, autopct='%1.1f%%')
plt.title('Sales Distribution by Customer Segment')
plt.axis('equal')
plt.show()

## إحصاءات ملخصة

In [None]:
# Sales statistics
sales_stats = df['Sales'].describe()
print("\nSales Statistics:")
print(sales_stats)

# Top 5 categories by sales
print("\nTop 5 Categories by Sales:")
top_categories = df.groupby('Category')['Sales'].sum().sort_values(ascending=False).head()
print(top_categories)