# Nyumbani Green Sales Data Analysis

This notebook contains a comprehensive analysis of Nyumbani Green's sales data.

## 1. Setup and Data Loading

### 1.1 Import Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import numpy as np
from sklearn.cluster import KMeans
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import silhouette_score

### 1.2 Load Data

In [None]:
df = pd.read_csv("nyumbani_green_sales.csv")
print(df.head())
print(df.info())

## 2. Data Cleaning and Preparation

In [None]:
# Convert date to datetime
df["date"] = pd.to_datetime(df["date"])

# Check for missing values
print(df.isnull().sum())

# Ensure numeric columns are of the correct type
numeric_columns = ["price_total", "product_uom_qty", "qty_delivered"]
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors="coerce")

## 3. Exploratory Data Analysis

### 3.1 Basic Statistics

In [None]:
print(df.describe())

### 3.2 Sales Distribution

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(df["price_total"], bins=50, kde=True)
plt.title("Distribution of Sales")
plt.xlabel("Sale Amount")
plt.ylabel("Frequency")
plt.show()

### 3.3 Top Products

In [None]:
top_products = df.groupby("Product")["price_total"].sum().sort_values(descending=True).head(10)
plt.figure(figsize=(12, 6))
top_products.plot(kind="bar")
plt.title("Top 10 Products by Total Sales")
plt.xlabel("Product")
plt.ylabel("Total Sales")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()

### 3.4 Sales Trend

In [None]:
daily_sales = df.groupby("date")["price_total"].sum().reset_index()
plt.figure(figsize=(12, 6))
plt.plot(daily_sales["date"], daily_sales["price_total"])
plt.title("Daily Sales Trend")
plt.xlabel("Date")
plt.ylabel("Total Sales")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 4. Customer Segmentation

In [None]:
# RFM Analysis code here
# ...

# Visualize RFM distributions
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20, 5))
sns.histplot(rfm['Recency'], kde=True, ax=ax1)
ax1.set_title('Recency Distribution')
sns.histplot(rfm['Frequency'], kde=True, ax=ax2)
ax2.set_title('Frequency Distribution')
sns.histplot(rfm['Monetary'], kde=True, ax=ax3)
ax3.set_title('Monetary Distribution')
plt.tight_layout()
plt.show()

## 5. Geographical Analysis

In [None]:
# Top 10 locations by sales
top_locations = df.groupby('customer_residence')['price_total'].sum().sort_values(descending=True).head(10)
plt.figure(figsize=(12, 6))
top_locations.plot(kind='bar')
plt.title('Top 10 Locations by Total Sales')
plt.xlabel('Location')
plt.ylabel('Total Sales')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## 6. Product Performance Analysis

In [None]:
# Product performance over time code here
# ...

# Visualization of product performance

## 7. Advanced Analysis

### 7.1 Correlation Analysis

In [None]:
# Correlation analysis code here
# ...

### 7.2 Advanced Customer Segmentation

In [None]:
# Advanced customer segmentation code here
# ...

### 7.3 Product Mix Optimization

In [None]:
# Product mix optimization code here
# ...

## 8. Sales Forecasting

In [None]:
# Sales forecasting code here
# ...

## 9. Recommendations

In [None]:
print("\nRecommendations for Nyumbani Green:")
# Recommendations code here
# ...

## Conclusion

Summarize key findings and their implications for Nyumbani Green's business strategy.