In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import openpyxl
import warnings
warnings.filterwarnings('ignore')

In [2]:
#Importing File
df = pd.read_excel('Stock Issues 1-12 August 2023.xlsx')
df.head(10)

Unnamed: 0,Date,Issued To,Item Name,Qty,Unit Price,Amount
0,2023-08-01,,UNKNOWN,,,
1,2023-08-01,,Engine Oil Delo Silver SAE40/ Diesel Engine Oil,9.0,12000.0,108000.0
2,2023-08-01,,"Oil Filter 04152-38020,YZZA4 L/C VDJ200R",1.0,48284.0,48284.0
3,2023-08-01,,"Diesel Filter 23390-51070 - L/C Hzj 76,79,VDJ200R",1.0,39500.0,39500.0
4,2023-08-01,,Air Cleaner 17801-51020 LC VDJ 200R,1.0,98034.0,98034.0
5,2023-08-01,,NATIONAL WATER AND SEWERAGE CORPORATION(NWSC),0.0,0.0,0.0
6,2023-08-01,,"Z-Link 48810-0K010 Lan,Kun25r",1.0,165000.0,165000.0
7,2023-08-01,,"Z-Link 48820-0K030 Lan,Kun25r",1.0,165000.0,165000.0
8,2023-08-01,,UPDF,0.0,0.0,0.0
9,2023-08-01,,Rear Brake Pads 04466-60160 LC VDJ 200R/KD2388,1.0,240000.0,240000.0


In [3]:
df.shape #Shape of our data - Rows and Columns

(1370, 6)

# Data Cleaning and Transformation

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1370 entries, 0 to 1369
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   Date        1370 non-null   datetime64[ns]
 1   Issued To   0 non-null      float64       
 2   Item Name   1370 non-null   object        
 3   Qty         1186 non-null   float64       
 4   Unit Price  1178 non-null   float64       
 5   Amount      1178 non-null   float64       
dtypes: datetime64[ns](1), float64(4), object(1)
memory usage: 64.3+ KB


In [5]:
#Changing date column data type to datetime
df["Date"] = pd.to_datetime(df["Date"], format="%A, %d %B %Y")
df.head()

Unnamed: 0,Date,Issued To,Item Name,Qty,Unit Price,Amount
0,2023-08-01,,UNKNOWN,,,
1,2023-08-01,,Engine Oil Delo Silver SAE40/ Diesel Engine Oil,9.0,12000.0,108000.0
2,2023-08-01,,"Oil Filter 04152-38020,YZZA4 L/C VDJ200R",1.0,48284.0,48284.0
3,2023-08-01,,"Diesel Filter 23390-51070 - L/C Hzj 76,79,VDJ200R",1.0,39500.0,39500.0
4,2023-08-01,,Air Cleaner 17801-51020 LC VDJ 200R,1.0,98034.0,98034.0


In [6]:
# Forward-fill missing dates
df["Date"].fillna(method="ffill", inplace=True)
df.head()

Unnamed: 0,Date,Issued To,Item Name,Qty,Unit Price,Amount
0,2023-08-01,,UNKNOWN,,,
1,2023-08-01,,Engine Oil Delo Silver SAE40/ Diesel Engine Oil,9.0,12000.0,108000.0
2,2023-08-01,,"Oil Filter 04152-38020,YZZA4 L/C VDJ200R",1.0,48284.0,48284.0
3,2023-08-01,,"Diesel Filter 23390-51070 - L/C Hzj 76,79,VDJ200R",1.0,39500.0,39500.0
4,2023-08-01,,Air Cleaner 17801-51020 LC VDJ 200R,1.0,98034.0,98034.0


In [9]:
# Replacing null values in "Issued To" column with the corresponding non-null value in "Item Name" column for only null values in QTy
df.loc[df["Issued To"].isnull() & df["Item Name"]==0, "Issued To"] = df.loc[df["Issued To"].isnull() & df["Item Name"]==0, "Item Name"]

df.head()

KeyError: 'Qty'

In [None]:
# Forward-fill missing values in the Issued To Column
df["Issued To"].fillna(method="ffill", inplace=True)

In [None]:
df.head()

In [None]:
# Droping rows with NaN values
df.dropna(inplace=True)

In [None]:
df.head(30)

# Exploratory Data Analysis

In [None]:
df.shape

In [None]:
df.info()

<b>Client exploration</b>

In [None]:
df['Issued To'].nunique()

We have 192 different clients from the month of July

<b>Top Loyal clients</b>

In [None]:
top20 = df['Issued To'].value_counts().head(20)
top20

In [None]:
top20.plot(kind = 'barh')

In [None]:
last20 = df['Issued To'].value_counts().tail(20)
last20

<b>Items Exploration</b>

In [None]:
df.head(1)

In [None]:
df['Items'].nunique()

The store has 502 unique items

<b>Fast Moving items</b>

In [None]:
fast_moving_items = df['Items'].value_counts().head(100)
fast_moving_items

<b>Amount Generated by each item</b>

In [None]:
# Calculating the amount generated by each item by multiplication
df["Total Amount"] = df["Quantity"] * df["Unit Price"]

# Top 50 items based on the total amount
top_50_items = df.groupby("Items")["Total Amount"].sum().nlargest(50)
print(top_50_items)

<b>Trend Analysis</b>

In [None]:
# Calculate the sum of total amount generated per date
date_total = df.groupby("Date")["Total Amount"].sum()

# Plot the line trend
plt.figure(figsize=(10, 6))
plt.plot(date_total.index, date_total.values, marker='o')
plt.title("Line Trend of Total Amount Generated per Date")
plt.xlabel("Date")
plt.ylabel("Total Amount Generated")
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()

<b>Exporting File to Excel</b>

In [None]:
df.to_excel('Stock Issues 1-12 August 2023.xlsx', index = False)