In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.optimize import minimize

In [2]:
df = pd.read_csv('OK FAIL DATE.csv')
df.head()

Unnamed: 0,Batch Status,Cycle End Time,DataLabel,Program Number
0,FAILED,2019-01-09 09:42:00,1,8
1,OK,2019-01-09 11:30:00,2,8
2,OK,2019-01-09 12:32:00,3,3
3,OK,2019-01-09 14:00:00,4,11
4,OK,2019-01-09 16:10:00,5,10


In [3]:
# rename cycle end time as Time
df.rename(columns={'Cycle End Time': 'Time'}, inplace=True)

In [4]:
# get time for datalabel 1 and time for last datalabel in the df and print them
time1 = df.loc[df['DataLabel'] == 1, 'Time'].values[0]
time2 = df.loc[df['DataLabel'] == 1860, 'Time'].values[0]
print(time1)
print(time2)
# the format of date is yyyy-mm-dd hh:mm:ss

2019-01-09 09:42:00
2021-04-05 16:43:00


In [5]:
# Convert 'Time' column to datetime format
df['Time'] = pd.to_datetime(df['Time'])

# Extract year from datetime and create a new column
df['Year'] = df['Time'].dt.year


In [6]:
def neg_log_likelihood(p, a, year):
    initial_p = p
    neg_log_likelihood_val = 0
    prev_status = 0
    for status in df[df['Year'] == year]['Batch Status']:  # Filter DataFrame based on year
        if status == 'FAILED': 
            neg_log_likelihood_val -= np.log(1 - p)
            p *= a 
        elif status == 'OK': 
            neg_log_likelihood_val -= np.log(p)
            p *= a  
        elif status == 'Maintenance': 
            p = initial_p  
        prev_status = status
    return neg_log_likelihood_val

In [7]:
a_values = np.arange(0.01, 1, 0.01)
p_values = np.arange(0.01, 1, 0.01)
unique_years = df['Year'].unique()
# Initialize variables to store the best parameters for each year
best_params_by_year = {}

# Iterate over unique years
for year in unique_years:
    # Initialize variables to store the minimum negative log-likelihood value and corresponding parameters for the current year
    min_neg_log_likelihood_year = float('inf')
    best_params_year = None
    
    # Iterate over all combinations of a and p for the current year
    for a in a_values:
        for p in p_values:
            # Calculate negative log-likelihood for current parameters and current year
            current_neg_log_likelihood = neg_log_likelihood(p, a, year)

            # Check if current negative log-likelihood is smaller than minimum found so far for the current year
            if current_neg_log_likelihood < min_neg_log_likelihood_year:
                min_neg_log_likelihood_year = current_neg_log_likelihood
                best_params_year = (p, a)
    
    # Store the best parameters for the current year
    best_params_by_year[year] = best_params_year

# Output the best parameters for each year
for year, params in best_params_by_year.items():
    print(f"Year: {year}, Best Parameters (p, a): {params}")


Year: 2019, Best Parameters (p, a): (0.8300000000000001, 0.99)
Year: 2020, Best Parameters (p, a): (0.97, 0.99)
Year: 2021, Best Parameters (p, a): (0.99, 0.99)
