In [2]:
# Task 1 – Data Cleaning
import pandas as pd
import numpy as np

# load data
df = pd.read_csv("production_data.csv")

# ---- batch_id (missing not allowed)
df = df.dropna(subset=["batch_id"])

# ---- production_date (convert to datetime)
df["production_date"] = pd.to_datetime(df["production_date"])

# ---- raw_material_supplier
supplier_map = {1: "national_supplier", 2: "international_supplier"}
df["raw_material_supplier"] = df["raw_material_supplier"].map(supplier_map)
df["raw_material_supplier"] = df["raw_material_supplier"].fillna("national_supplier")

# ---- pigment_type
valid_pigments = ["type_a", "type_b", "type_c"]
df["pigment_type"] = df["pigment_type"].where(
    df["pigment_type"].isin(valid_pigments),
    "other"
)

# ---- pigment_quantity (range 1–100, fill with median)
df.loc[(df["pigment_quantity"] < 1) | (df["pigment_quantity"] > 100), "pigment_quantity"] = np.nan
df["pigment_quantity"] = df["pigment_quantity"].fillna(df["pigment_quantity"].median())

# ---- mixing_time (fill with mean rounded to 2 decimals)
mixing_mean = round(df["mixing_time"].mean(), 2)
df["mixing_time"] = df["mixing_time"].fillna(mixing_mean)

# ---- mixing_speed
valid_speeds = ["Low", "Medium", "High"]
df["mixing_speed"] = df["mixing_speed"].where(
    df["mixing_speed"].isin(valid_speeds),
    "Not Specified"
)

# ---- product_quality_score (fill with mean rounded to 2 decimals)
quality_mean = round(df["product_quality_score"].mean(), 2)
df["product_quality_score"] = df["product_quality_score"].fillna(quality_mean)

# final output
clean_data = df

FileNotFoundError: [Errno 2] No such file or directory: 'production_data.csv'

In [3]:
# Task 2 – Aggregation
import pandas as pd

df = pd.read_csv("production_data.csv")

aggregated_data = (
    df
    .groupby("raw_material_supplier")
    .agg(
        avg_product_quality_score=("product_quality_score", "mean"),
        avg_pigment_quantity=("pigment_quantity", "mean")
    )
    .reset_index()
)

aggregated_data["avg_product_quality_score"] = aggregated_data["avg_product_quality_score"].round(2)
aggregated_data["avg_pigment_quantity"] = aggregated_data["avg_pigment_quantity"].round(2)

FileNotFoundError: [Errno 2] No such file or directory: 'production_data.csv'

In [5]:
# Task 3 – Filtered Analysis
import pandas as pd

df = pd.read_csv("production_data.csv")

filtered = df[
    (df["raw_material_supplier"] == 2) &
    (df["pigment_quantity"] > 35)
]

pigment_data = pd.DataFrame({
    "raw_material_supplier": [2],
    "pigment_quantity": [round(filtered["pigment_quantity"].mean(), 2)],
    "avg_product_quality_score": [round(filtered["product_quality_score"].mean(), 2)]
})

FileNotFoundError: [Errno 2] No such file or directory: 'production_data.csv'

In [None]:
# Task 4 – Statistical Summary
import pandas as pd

df = pd.read_csv("production_data.csv")

product_quality = pd.DataFrame({
    "product_quality_score_mean": [round(df["product_quality_score"].mean(), 2)],
    "product_quality_score_sd": [round(df["product_quality_score"].std(), 2)],
    "pigment_quantity_mean": [round(df["pigment_quantity"].mean(), 2)],
    "pigment_quantity_sd": [round(df["pigment_quantity"].std(), 2)],
    "corr_coef": [round(df["pigment_quantity"].corr(df["product_quality_score"]), 2)]
})