In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset
data = pd.read_csv('PurchasesFINAL12312016.csv')
# Display the first few rows of the dataset to understand its structure
data.head()

# Basic data overview: column names, data types, and missing values
data.info()

# Summary statistics for numeric columns
summary_stats = data.describe()
summary_stats

# Set up the canvas for two plots side by side
plt.figure(figsize=(14,6))

# Plot the distribution of PurchasePrice
plt.subplot(1, 2, 1)
sns.histplot(data['PurchasePrice'], bins=20, kde=True, color='blue')
plt.title('Distribution of Purchase Price')

# Plot the distribution of Quantity
plt.subplot(1, 2, 2)
sns.histplot(data['Quantity'], bins=20, kde=True, color='green')
plt.title('Distribution of Quantity')

# Adjust the layout
plt.tight_layout()
plt.show()

# Group the data by VendorName and sum the total purchases (Dollars)
vendor_purchases = data.groupby('VendorName')['Dollars'].sum().sort_values(ascending=False)

# Plot a bar chart for the top 10 vendors
plt.figure(figsize=(10,6))
sns.barplot(x=vendor_purchases.head(10).values, y=vendor_purchases.head(10).index, palette='viridis')
plt.title('Top 10 Vendors by Total Purchases (Dollars)')
plt.xlabel('Total Dollars')
plt.show()

# Convert the 'PODate' to datetime if it is not already in that format
data['PODate'] = pd.to_datetime(data['PODate'])

# Group the data by month and sum the total purchases in each month
monthly_purchases = data.groupby(data['PODate'].dt.to_period('M'))['Dollars'].sum()

# Plot the purchase trend over time
plt.figure(figsize=(10,6))
monthly_purchases.plot()
plt.title('Total Purchases Over Time')
plt.ylabel('Dollars')
plt.xlabel('Month')
plt.show()