Import Libraries

In [6]:
import pandas as pd
import numpy as np
from scipy import stats
import statsmodels.api as sm
from statsmodels.formula.api import ols

Dataset Generation

In [7]:
# Set the seed for reproducibility
np.random.seed(42)
num_records = 400

# Generate date range
date_range = pd.date_range(start='2023-01-01', periods=num_records, freq='D')

# Generate product IDs
product_ids = np.arange(1, num_records + 1)

# Generate random production times (in minutes)
production_times = np.random.randint(30, 180, size=num_records)

# Generate random temperature values (in Celsius)
temperatures = np.random.uniform(15, 30, size=num_records)

# Generate random humidity values (in percentage)
humidities = np.random.uniform(20, 80, size=num_records)

# Generate random quality scores (0 to 100)
quality_scores = np.random.uniform(50, 100, size=num_records)

# Create DataFrame
data = {
    'Production_Date': date_range,
    'Product_ID': product_ids,
    'Production_Time': production_times,
    'Temperature': temperatures,
    'Humidity': humidities,
    'Quality_Score': quality_scores
}

df = pd.DataFrame(data)

# Save the DataFrame to an Excel file
excel_file_path = '/content/production_quality_dataset.xlsx'
df.to_excel(excel_file_path, index=False)
print(f"Dataset saved to {excel_file_path}")

Dataset saved to /content/production_quality_dataset.xlsx


Analyzing the data using statistical techniques

In [8]:
# Descriptive statistics
print("Descriptive Statistics:")
print(df.describe())

# ANOVA to determine the influence of Temperature and Humidity on Quality Score
model = ols('Quality_Score ~ Temperature + Humidity + Production_Time', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print("\nANOVA Table:")
print(anova_table)

# T-tests for each factor
print("\nT-tests:")
ttest_temp = stats.ttest_ind(df[df['Temperature'] > df['Temperature'].median()]['Quality_Score'],
                             df[df['Temperature'] <= df['Temperature'].median()]['Quality_Score'])
print(f"T-test for Temperature: {ttest_temp}")

ttest_humid = stats.ttest_ind(df[df['Humidity'] > df['Humidity'].median()]['Quality_Score'],
                              df[df['Humidity'] <= df['Humidity'].median()]['Quality_Score'])
print(f"T-test for Humidity: {ttest_humid}")

ttest_time = stats.ttest_ind(df[df['Production_Time'] > df['Production_Time'].median()]['Quality_Score'],
                             df[df['Production_Time'] <= df['Production_Time'].median()]['Quality_Score'])
print(f"T-test for Production Time: {ttest_time}")

Descriptive Statistics:
           Production_Date  Product_ID  Production_Time  Temperature  \
count                  400  400.000000       400.000000   400.000000   
mean   2023-07-19 12:00:00  200.500000       107.260000    22.469611   
min    2023-01-01 00:00:00    1.000000        30.000000    15.162565   
25%    2023-04-10 18:00:00  100.750000        70.000000    18.539599   
50%    2023-07-19 12:00:00  200.500000       112.000000    22.402841   
75%    2023-10-27 06:00:00  300.250000       145.000000    26.408228   
max    2024-02-04 00:00:00  400.000000       179.000000    29.995765   
std                    NaN  115.614301        43.582807     4.458471   

         Humidity  Quality_Score  
count  400.000000     400.000000  
mean    50.123635      75.170983  
min     20.277921      50.246999  
25%     34.006741      62.156149  
50%     50.557316      75.832692  
75%     65.764728      87.145348  
max     79.876047      99.970686  
std     17.584276      14.674794  

ANOVA Table

Strategies for improving product quality based on the analysis

In [9]:
def propose_strategies(anova_table, ttests):
    strategies = []
    for factor in anova_table.index:
        p_value = anova_table.loc[factor, 'PR(>F)']
        if p_value < 0.05:
            strategies.append(f"Optimize {factor.lower()} as it significantly influences product quality (p-value={p_value:.3f}).")

    for name, test in ttests.items():
        if test.pvalue < 0.05:
            strategies.append(f"Adjust {name.lower()} as it significantly influences product quality (p-value={test.pvalue:.3f}).")

    return strategies

ttests = {
    'Temperature': ttest_temp,
    'Humidity': ttest_humid,
    'Production Time': ttest_time
}

strategies = propose_strategies(anova_table, ttests)
print("\nProposed Strategies for Improving Product Quality:")
for strategy in strategies:
    print(f"- {strategy}")




Proposed Strategies for Improving Product Quality:
- Optimize production_time as it significantly influences product quality (p-value=0.048).
