In [1]:
import matplotlib.pyplot as plt

In [2]:
import numpy as np


In [3]:
import pandas as pd
import winsound  # for beep sound when Anomaly is detected
import pyttsx3   # to produce the sound "Anomaly detected"
from scipy.stats import norm

In [4]:
df= pd.read_csv(r"Redwine_data.csv")

In [5]:
df 

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.700,0.00,1.9,0.076,11,34,0.99780,3.51,0.56,9.4,5
1,7.8,0.880,0.00,2.6,0.098,25,67,0.99680,3.20,0.68,9.8,5
2,7.8,0.760,0.04,2.3,0.092,15,54,0.99700,3.26,0.65,9.8,5
3,11.2,0.280,0.56,1.9,0.075,17,60,0.99800,3.16,0.58,9.8,6
4,7.4,0.700,0.00,1.9,0.076,11,34,0.99780,3.51,0.56,9.4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32,44,0.99490,3.45,0.58,10.5,5
1595,5.9,0.550,0.10,2.2,0.062,39,51,0.99512,3.52,0.76,11.2,6
1596,6.3,0.510,0.13,2.3,0.076,29,40,0.99574,3.42,0.75,11.0,6
1597,5.9,0.645,0.12,2.0,0.075,32,44,0.99547,3.57,0.71,10.2,5


In [6]:
df.columns

Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')

In [7]:
anomaly_features= df[['fixed acidity', 'sulphates', 'alcohol']]

In [8]:
anomaly_features

Unnamed: 0,fixed acidity,sulphates,alcohol
0,7.4,0.56,9.4
1,7.8,0.68,9.8
2,7.8,0.65,9.8
3,11.2,0.58,9.8
4,7.4,0.56,9.4
...,...,...,...
1594,6.2,0.58,10.5
1595,5.9,0.76,11.2
1596,6.3,0.75,11.0
1597,5.9,0.71,10.2


## Printing Information

In [9]:
print(f"ACIDITY   LIMITS:    MIN = {np.min(df['fixed acidity'])}      MAX = {np.max(df['fixed acidity'])}       MEAN: {np.mean(df['fixed acidity'])}")
print(f"SULPHATES LIMITS:    MIN = {np.min(df['sulphates'])}     MAX = {np.max(df['sulphates'])}        MEAN: {np.mean(df['sulphates'])}")
print(f"ALCOHOL   LIMITS:    MIN = {np.min(df['alcohol'])}      MAX = {np.max(df['alcohol'])}       MEAN: {np.mean(df['alcohol'])}")

ACIDITY   LIMITS:    MIN = 4.6      MAX = 15.9       MEAN: 8.31963727329581
SULPHATES LIMITS:    MIN = 0.33     MAX = 2.0        MEAN: 0.6581488430268917
ALCOHOL   LIMITS:    MIN = 8.4      MAX = 14.9       MEAN: 10.422983114446529


# Density-based Anomaly Detection


We are using density based Anomaly Detection approach. This approach is quite reliable and 
ensures that we get some parameter, finally, for detection of future anomalies.

In [10]:
# To manually compute the pdf 
# This function has not been used anywhere. It has been put just as a reference
def comp_norm_probs_by_formula(column, mean, sd):
    return (1/(np.sqrt(2*3.141592653589793) *sd))*(np.exp(-(0.5)*((column-mean)/sd)**2))
                                                   

In [11]:
# This function is actually being used for density calculation
def compute_normal_probabilities(column, mean, sd):
   
    probabilities =  norm.pdf(column, loc=mean, scale=sd)
    return probabilities





.

For density-based approach, it is recommended to have normalized Anomaly features

In [12]:
def normalize(data):
    # This formula of normalization is called min-max normalization
    return (data - np.min(data)) / (np.max(data) - np.min(data))
    

In [13]:
# Normalizing the 3 features
fixed_acidity= normalize(df['fixed acidity'])
Sulphates= normalize(df['sulphates'])
Alcohol= normalize(df['alcohol'])


In [14]:
# Means of the 3 features
mean_acid= np.mean(fixed_acidity)
mean_sulphate= np.mean(Sulphates)
mean_alcohol= np.mean(Alcohol)


In [15]:
# Standard Deviation of 3 features
sd_acid= np.std(fixed_acidity)
sd_sulphate= np.std(Sulphates)
sd_alcohol=np.std(Alcohol)

.

Finding the probabilities for individual features. These probabilities have to be combined finally to
get a single representative probability. This representative probability is compared with the threshold value to decide whether or not the data is anomalous.

In [16]:
prob_acidity  = compute_normal_probabilities  (fixed_acidity, mean_acid,     sd_acid)
prob_sulphate = compute_normal_probabilities  (Sulphates,     mean_sulphate, sd_sulphate)
prob_alcohol  = compute_normal_probabilities  (Alcohol,       mean_alcohol,  sd_alcohol)



#### Don't get confused with the probability values

Note that you will often see probability values greater than 1. Don't get confused because the 
probabilities here are not in there actual statistical sense. They actually represent density of a data point in the normal distribution and there range is from 0 to positive infinity. The higher the density(2,3,6,10 etc.) the more likely the value is to occur. 

In [17]:
combined_probabilities= prob_acidity*prob_sulphate*prob_alcohol


## Now for Epsilon



In [18]:
 Expected_Anomaly_rate= 5 # which means 5% of our products are expected to be anomalous

We will use 5th percentile to compute Epsilon. As Expected_Anomaly_rate specifies 5% of products could be anomalous.


This tells us that we need to compute 5th percentile and set that value as our Epsilon value


In [19]:
Epsilon= np.percentile(combined_probabilities, Expected_Anomaly_rate)
print("Epsilon: ", Epsilon)

Epsilon:  0.2682638338445016


In [26]:
Trues= combined_probabilities<Epsilon

print("Number of Anomalies in the given data:    ",np.sum(Trues))  
print("Fine products: ", len(Trues)-np.sum(Trues))

Number of Anomalies in the given data:     80
Fine products:  1519


## Now Creating the Detection System

In [21]:
import tkinter as tk
window = tk.Tk() # Creating a global object of window to enhance its accessibility
global window


In [27]:
"""This function is take values from data fields as input, convert them to float and 
perform the necessary steps to compute the representative probability for the new data
point entered. If the data point represents an anomaly, we will have to generate the necessary
GUI changes and the beep sound and "Anomaly Detected Alarm". Otherwise, we will inform the user
that the item entered is a fine item"""

def checking(acidity, sulphates, alcohol):
    acidity_new=    float(acidity)
    sulphates_new=  float(sulphates)
    alcohol_new=    float(alcohol)

    
    #  Normalizing the new input

    norm_acid_new= (acidity_new-np.min(df['fixed acidity']))/ (np.max(df['fixed acidity'])-np.min(df['fixed acidity']))
    norm_sulphate_new= (sulphates_new-np.min(df['sulphates']))/ (np.max(df['sulphates'])-np.min(df['sulphates']))
    norm_alcohol_new= (alcohol_new-np.min(df['alcohol']))/ (np.max(df['alcohol'])-np.min(df['alcohol']))

    
    new_prob1= compute_normal_probabilities(norm_acid_new, mean_acid, sd_acid)              # new acid probability
    new_prob2= compute_normal_probabilities(norm_sulphate_new, mean_sulphate, sd_sulphate)   # new sulphate probability
    new_prob3= compute_normal_probabilities(norm_alcohol_new, mean_alcohol, sd_alcohol)      # new alcohol probability
    new_probability= new_prob1* new_prob2 * new_prob3           #new combined probability  




    engine= pyttsx3.init()
    voices = engine.getProperty('voices')
    engine.setProperty('voice', voices[1].id)  # to specify female voice
    if(new_probability< Epsilon):
        
        #Creating Label
        label_anomalous = tk.Label(window, text="Anomaly Detected", bg="red")     
        label_anomalous.grid(row=280, column=4, padx=10, pady=10)
        
        
        winsound.Beep(1000, 2000) # to generate beep sound, 1000 Hz frequency for 2000 milli seconds


        


        for i in range(3):
            engine.say("Anomaly Detected")          # for repeating the warning 3 times
            engine.runAndWait()
        
        
    else: 
       
       
        engine.say("Fine Item")
        label_fine = tk.Label(window, text="Fine Item", bg="green")     
        label_fine.grid(row=280, column=4, padx=10, pady=10)
        engine.runAndWait()
   

### Creating GUI

In [23]:
import tkinter as tk
 
    
    
"""This portion is mainly concerned with GUI settings for the system."""



screen_width = window.winfo_screenwidth()
screen_height = window.winfo_screenheight()
# Calculate the x and y coordinates to center the window
x_coordinate = (screen_width - 300) // 2  #
y_coordinate = (screen_height - 300) // 2
window.geometry(f"300x300+{x_coordinate}+{y_coordinate}")

window.title("Anomaly Detection System")


window.configure(bg="blue")

# Create input labels and corresponding entry fields
label_acidity = tk.Label(window, text="Acidity", bg="silver")
entry_acidity = tk.Entry(window)

label_sulphates = tk.Label(window, text="Sulphates", bg="silver") 
entry_sulphates = tk.Entry(window)

label_alcohol = tk.Label(window, text="Alcohol", bg="silver")
entry_alcohol = tk.Entry(window)


# Create a submit button. Note that lambda is being used to enalbe passing of parameters by making a small function in the parameters.
submit_button = tk.Button(window, text="Submit", command= lambda: checking(entry_acidity.get(), entry_sulphates.get(), entry_alcohol.get()))


# Arrange the labels, entry fields, and button using grid layout
label_acidity.grid(row=200, column=4, padx=10, pady=10)

entry_acidity.grid(row=200, column=5, padx=10, pady=10)
entry_acidity.insert(0, "0.0")

label_sulphates.grid(row=225, column=4, padx=10, pady=10)
entry_sulphates.grid(row=225, column= 5, padx=10, pady=10)
entry_sulphates.insert(0, "0.0")

label_alcohol.grid(row=250, column=4, padx=10, pady=10)
entry_alcohol.grid(row=250, column=5, padx=10, pady=10)
entry_alcohol.insert(0, "0.0")


submit_button.grid(row=260, column=4, columnspan=2, pady=10)

window.mainloop()