# System 1

In [1]:
# Imports pandas library
import pandas as pd

# Reading CSV files and putting them in data frames.
df_red = pd.read_csv(r'data\winequality-red.csv', sep=";")

df_white = pd.read_csv(r'data\winequality-white.csv', sep=";")

# Getting the median of both red/wines alcohol and pH levels.
red_residual_sugar_median = df_red['residual sugar'].mean()

red_volatile_acidity_median = df_red['volatile acidity'].mean()

# Adding new column for actual wine type and assigning red/white to both data frames.
df_red["actual_wine"] = 'red'

df_white["actual_wine"] = 'white'

df_red["predicted_wine"] = 'na'

df_white["predicted_wine"] = 'na'

# Concatenating both data frames together to form a single data frame.
concat_df = pd.concat([df_red, df_white], axis=0)

# Initialize variables
tp = 0
tn = 0
fp = 0
fn = 0

for index, row in concat_df.iterrows():
    # Determine predicted wine type
    # Inference engine which uses forward chaining.
    if row['residual sugar'] <= red_residual_sugar_median + (red_residual_sugar_median * 0.3) and \
       row['residual sugar'] >= red_residual_sugar_median - (red_residual_sugar_median * 0.3) and \
       row['volatile acidity'] <= red_volatile_acidity_median + (red_volatile_acidity_median * 1) and \
       row['volatile acidity'] >= red_volatile_acidity_median - (red_volatile_acidity_median * 1):
        predicted_wine = 'red'
    else:
        predicted_wine = 'white'

    # Assign predicted wine type to the DataFrame
    concat_df.at[index, 'predicted_wine'] = predicted_wine


    # Update tp, tn, fp, fn based on the predicted and actual values
    if predicted_wine == 'red' and row['actual_wine'] == 'red':
        tp += 1
    elif predicted_wine == 'red' and row['actual_wine'] == 'white':
        fp += 1
    elif predicted_wine == 'white' and row['actual_wine'] == 'red':
        fn += 1
    elif predicted_wine == 'white' and row['actual_wine'] == 'white':
        tn += 1

print(f"True Positives (tp): {tp}")
print(f"True Negatives (tn): {tn}")
print(f"False Positives (fp): {fp}")
print(f"False Negatives (fn): {fn}")

accuracy = (tp + tn) / (tp + tn + fp + fn)
recall = tp / (tp + fn)
precision = tp / (tp + fp)
f1 = 2 * ((precision * recall) / (precision + recall))
print(f"Accuracy: {accuracy:.2f}")
print(f"Recall: {recall:.2f}")
print(f"Precision: {precision:.2f}")
print(f"F1: {f1:.2f}")

concat_df.to_csv('results.csv')


True Positives (tp): 1193
True Negatives (tn): 4202
False Positives (fp): 696
False Negatives (fn): 406
Accuracy: 0.83
Recall: 0.75
Precision: 0.63
F1: 0.68
