# Milk Records Exploration Notebook

This notebook is intended for exploratory data analysis (EDA) of the milk records dataset. It will include data loading, cleaning, and visualization steps to gain insights before integrating findings into the main dashboard.

In [None]:
# Import necessary libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import requests
from io import StringIO

# Define the URL for the dataset
sheet_url = "https://docs.google.com/spreadsheets/d/1tAnw43L2nrF-7wGqqppF51w6tE8w42qhmPKSXBO3fmo/export?format=csv&gid=725446854"

# Load the data
response = requests.get(sheet_url)
data = StringIO(response.text)
df = pd.read_csv(data)
df.columns = df.columns.str.strip()  # Clean column names

# Display the first few rows of the dataframe
df.head()

In [None]:
# Data cleaning
col_name = "How much milk received? (ml/Liters)"
if col_name in df.columns:
    df[col_name] = df[col_name].str.replace('ml', '').str.strip().astype(int)
df['Date of Record'] = pd.to_datetime(df['Date of Record'])

# Check for missing values
df.isnull().sum()

In [None]:
# Exploratory Data Analysis

# Summary statistics
df.describe()

# Visualizations
plt.figure(figsize=(12, 6))
sns.lineplot(data=df, x='Date of Record', y=col_name, marker='o')
plt.title('Daily Milk Received Over Time')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Milk Received Status Distribution
plt.figure(figsize=(8, 6))
sns.countplot(data=df, x='Milk Received?')
plt.title('Distribution of Milk Received Status')
plt.show()

In [None]:
# Milk Quantity Distribution
plt.figure(figsize=(8, 6))
sns.histplot(df[col_name], bins=20, kde=True)
plt.title('Distribution of Milk Quantity')
plt.xlabel('Milk Quantity (ml)')
plt.ylabel('Frequency')
plt.show()