In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load and preprocess data
df = pd.read_csv('WineQT.csv')
X = df.drop('quality', axis=1)
y = df['quality']
y = (y >= 6).astype(int)

# Feature engineering
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_tensor = torch.FloatTensor(X_train_scaled).to(device)
X_test_tensor = torch.FloatTensor(X_test_scaled).to(device)
y_train_tensor = torch.LongTensor(y_train.values).to(device)
y_test_tensor = torch.LongTensor(y_test.values).to(device)

class ImprovedAttentionModel(nn.Module):
    def __init__(self, input_dim, attention_dim, hidden_dim, num_classes, dropout_rate=0.5):
        super(ImprovedAttentionModel, self).__init__()
        self.attention = nn.Sequential(
            nn.Linear(input_dim, attention_dim),
            nn.ReLU(),
            nn.Linear(attention_dim, 1)
        )
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, num_classes)
        self.dropout = nn.Dropout(dropout_rate)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.bn2 = nn.BatchNorm1d(hidden_dim)
        
    def forward(self, x):
        attention_weights = F.softmax(self.attention(x), dim=1)
        weighted_features = x * attention_weights
        x = self.dropout(F.leaky_relu(self.bn1(self.fc1(weighted_features))))
        x = self.dropout(F.leaky_relu(self.bn2(self.fc2(x))))
        output = self.fc3(x)
        return output, weighted_features, attention_weights

def train_model(model, train_loader, criterion, optimizer, scheduler, num_epochs=200):
    for epoch in range(num_epochs):
        model.train()
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs, _, _ = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
        
        scheduler.step(loss)
        
        if (epoch + 1) % 10 == 0:
            model.eval()
            with torch.no_grad():
                test_outputs, _, _ = model(X_test_tensor)
                _, predicted = torch.max(test_outputs.data, 1)
                accuracy = accuracy_score(y_test_tensor.cpu(), predicted.cpu())
                print(f'Epoch [{epoch+1}/{num_epochs}], Test Accuracy: {accuracy:.4f}')

    return model

input_dim = X_train_scaled.shape[1]
attention_dim = 128
hidden_dim = 256
num_classes = 2
dropout_rate = 0.5
learning_rate = 0.001
batch_size = 64
num_epochs = 300

kf = KFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = []

for fold, (train_idx, val_idx) in enumerate(kf.split(X_train_scaled)):
    print(f"Fold {fold + 1}")
    
    X_train_fold = X_train_tensor[train_idx]
    y_train_fold = y_train_tensor[train_idx]
    X_val_fold = X_train_tensor[val_idx]
    y_val_fold = y_train_tensor[val_idx]
    
    model = ImprovedAttentionModel(input_dim, attention_dim, hidden_dim, num_classes, dropout_rate).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    scheduler = ReduceLROnPlateau(optimizer, 'min', patience=10, factor=0.5, verbose=True)
    
    train_dataset = TensorDataset(X_train_fold, y_train_fold)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    
    model = train_model(model, train_loader, criterion, optimizer, scheduler, num_epochs)
    
    model.eval()
    with torch.no_grad():
        val_outputs, _, _ = model(X_val_fold)
        _, predicted = torch.max(val_outputs.data, 1)
        accuracy = accuracy_score(y_val_fold.cpu(), predicted.cpu())
        cv_scores.append(accuracy)
        print(f"Fold {fold + 1} Accuracy: {accuracy:.4f}")

print(f"Mean CV Accuracy: {np.mean(cv_scores):.4f} (+/- {np.std(cv_scores):.4f})")

# Train the final model on the entire training set
final_model = ImprovedAttentionModel(input_dim, attention_dim, hidden_dim, num_classes, dropout_rate).to(device)
final_criterion = nn.CrossEntropyLoss()
final_optimizer = optim.Adam(final_model.parameters(), lr=learning_rate, weight_decay=1e-4)
final_scheduler = ReduceLROnPlateau(final_optimizer, 'min', patience=10, factor=0.5, verbose=True)

final_train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
final_train_loader = DataLoader(final_train_dataset, batch_size=batch_size, shuffle=True)

final_model = train_model(final_model, final_train_loader, final_criterion, final_optimizer, final_scheduler, num_epochs)

# Extract attention-weighted features
final_model.eval()
with torch.no_grad():
    _, train_features, _ = final_model(X_train_tensor)
    _, test_features, _ = final_model(X_test_tensor)

train_features = train_features.cpu().numpy()
test_features = test_features.cpu().numpy()

# Create an ensemble of multiple models
knn = KNeighborsClassifier(n_neighbors=5)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
gb = GradientBoostingClassifier(n_estimators=100, random_state=42)

knn.fit(train_features, y_train)
rf.fit(train_features, y_train)
gb.fit(train_features, y_train)

knn_predictions = knn.predict(test_features)
rf_predictions = rf.predict(test_features)
gb_predictions = gb.predict(test_features)
nn_predictions = final_model(X_test_tensor)[0].argmax(dim=1).cpu().numpy()

ensemble_predictions = np.round((knn_predictions + rf_predictions + gb_predictions + nn_predictions) / 4).astype(int)
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)

print(f"KNN Accuracy: {accuracy_score(y_test, knn_predictions):.4f}")
print(f"Random Forest Accuracy: {accuracy_score(y_test, rf_predictions):.4f}")
print(f"Gradient Boosting Accuracy: {accuracy_score(y_test, gb_predictions):.4f}")
print(f"Neural Network Accuracy: {accuracy_score(y_test, nn_predictions):.4f}")
print(f"Ensemble Accuracy: {ensemble_accuracy:.4f}")

Using device: cuda
Fold 1




Epoch [10/300], Test Accuracy: 0.7500
Epoch [20/300], Test Accuracy: 0.7500
Epoch [30/300], Test Accuracy: 0.7562
Epoch [40/300], Test Accuracy: 0.7469
Epoch [50/300], Test Accuracy: 0.7500
Epoch [60/300], Test Accuracy: 0.7531
Epoch [70/300], Test Accuracy: 0.7594
Epoch [80/300], Test Accuracy: 0.7562
Epoch [90/300], Test Accuracy: 0.7594
Epoch [100/300], Test Accuracy: 0.7625
Epoch [110/300], Test Accuracy: 0.7656
Epoch [120/300], Test Accuracy: 0.7594
Epoch [130/300], Test Accuracy: 0.7656
Epoch [140/300], Test Accuracy: 0.7594
Epoch [150/300], Test Accuracy: 0.7656
Epoch [160/300], Test Accuracy: 0.7656
Epoch [170/300], Test Accuracy: 0.7625
Epoch [180/300], Test Accuracy: 0.7594
Epoch [190/300], Test Accuracy: 0.7656
Epoch [200/300], Test Accuracy: 0.7625
Epoch [210/300], Test Accuracy: 0.7562
Epoch [220/300], Test Accuracy: 0.7625
Epoch [230/300], Test Accuracy: 0.7656
Epoch [240/300], Test Accuracy: 0.7656
Epoch [250/300], Test Accuracy: 0.7625
Epoch [260/300], Test Accuracy: 0.



Epoch [10/300], Test Accuracy: 0.7500
Epoch [20/300], Test Accuracy: 0.7438
Epoch [30/300], Test Accuracy: 0.7406
Epoch [40/300], Test Accuracy: 0.7500
Epoch [50/300], Test Accuracy: 0.7562
Epoch [60/300], Test Accuracy: 0.7562
Epoch [70/300], Test Accuracy: 0.7625
Epoch [80/300], Test Accuracy: 0.7625
Epoch [90/300], Test Accuracy: 0.7656
Epoch [100/300], Test Accuracy: 0.7656
Epoch [110/300], Test Accuracy: 0.7656
Epoch [120/300], Test Accuracy: 0.7594
Epoch [130/300], Test Accuracy: 0.7594
Epoch [140/300], Test Accuracy: 0.7625
Epoch [150/300], Test Accuracy: 0.7625
Epoch [160/300], Test Accuracy: 0.7656
Epoch [170/300], Test Accuracy: 0.7656
Epoch [180/300], Test Accuracy: 0.7656
Epoch [190/300], Test Accuracy: 0.7625
Epoch [200/300], Test Accuracy: 0.7625
Epoch [210/300], Test Accuracy: 0.7625
Epoch [220/300], Test Accuracy: 0.7656
Epoch [230/300], Test Accuracy: 0.7688
Epoch [240/300], Test Accuracy: 0.7625
Epoch [250/300], Test Accuracy: 0.7656
Epoch [260/300], Test Accuracy: 0.



Epoch [10/300], Test Accuracy: 0.7406
Epoch [20/300], Test Accuracy: 0.7344
Epoch [30/300], Test Accuracy: 0.7500
Epoch [40/300], Test Accuracy: 0.7344
Epoch [50/300], Test Accuracy: 0.7375
Epoch [60/300], Test Accuracy: 0.7438
Epoch [70/300], Test Accuracy: 0.7594
Epoch [80/300], Test Accuracy: 0.7562
Epoch [90/300], Test Accuracy: 0.7594
Epoch [100/300], Test Accuracy: 0.7625
Epoch [110/300], Test Accuracy: 0.7656
Epoch [120/300], Test Accuracy: 0.7594
Epoch [130/300], Test Accuracy: 0.7625
Epoch [140/300], Test Accuracy: 0.7656
Epoch [150/300], Test Accuracy: 0.7625
Epoch [160/300], Test Accuracy: 0.7562
Epoch [170/300], Test Accuracy: 0.7625
Epoch [180/300], Test Accuracy: 0.7656
Epoch [190/300], Test Accuracy: 0.7625
Epoch [200/300], Test Accuracy: 0.7625
Epoch [210/300], Test Accuracy: 0.7625
Epoch [220/300], Test Accuracy: 0.7625
Epoch [230/300], Test Accuracy: 0.7656
Epoch [240/300], Test Accuracy: 0.7625
Epoch [250/300], Test Accuracy: 0.7656
Epoch [260/300], Test Accuracy: 0.



Epoch [10/300], Test Accuracy: 0.7500
Epoch [20/300], Test Accuracy: 0.7500
Epoch [30/300], Test Accuracy: 0.7562
Epoch [40/300], Test Accuracy: 0.7531
Epoch [50/300], Test Accuracy: 0.7625
Epoch [60/300], Test Accuracy: 0.7812
Epoch [70/300], Test Accuracy: 0.7906
Epoch [80/300], Test Accuracy: 0.7719
Epoch [90/300], Test Accuracy: 0.7844
Epoch [100/300], Test Accuracy: 0.7875
Epoch [110/300], Test Accuracy: 0.7875
Epoch [120/300], Test Accuracy: 0.7812
Epoch [130/300], Test Accuracy: 0.7844
Epoch [140/300], Test Accuracy: 0.7844
Epoch [150/300], Test Accuracy: 0.7844
Epoch [160/300], Test Accuracy: 0.7875
Epoch [170/300], Test Accuracy: 0.7844
Epoch [180/300], Test Accuracy: 0.7875
Epoch [190/300], Test Accuracy: 0.7875
Epoch [200/300], Test Accuracy: 0.7875
Epoch [210/300], Test Accuracy: 0.7875
Epoch [220/300], Test Accuracy: 0.7875
Epoch [230/300], Test Accuracy: 0.7875
Epoch [240/300], Test Accuracy: 0.7844
Epoch [250/300], Test Accuracy: 0.7812
Epoch [260/300], Test Accuracy: 0.



Epoch [10/300], Test Accuracy: 0.7594
Epoch [20/300], Test Accuracy: 0.7469
Epoch [30/300], Test Accuracy: 0.7500
Epoch [40/300], Test Accuracy: 0.7500
Epoch [50/300], Test Accuracy: 0.7562
Epoch [60/300], Test Accuracy: 0.7531
Epoch [70/300], Test Accuracy: 0.7562
Epoch [80/300], Test Accuracy: 0.7531
Epoch [90/300], Test Accuracy: 0.7562
Epoch [100/300], Test Accuracy: 0.7500
Epoch [110/300], Test Accuracy: 0.7531
Epoch [120/300], Test Accuracy: 0.7531
Epoch [130/300], Test Accuracy: 0.7594
Epoch [140/300], Test Accuracy: 0.7500
Epoch [150/300], Test Accuracy: 0.7500
Epoch [160/300], Test Accuracy: 0.7531
Epoch [170/300], Test Accuracy: 0.7531
Epoch [180/300], Test Accuracy: 0.7500
Epoch [190/300], Test Accuracy: 0.7562
Epoch [200/300], Test Accuracy: 0.7531
Epoch [210/300], Test Accuracy: 0.7531
Epoch [220/300], Test Accuracy: 0.7562
Epoch [230/300], Test Accuracy: 0.7562
Epoch [240/300], Test Accuracy: 0.7531
Epoch [250/300], Test Accuracy: 0.7500
Epoch [260/300], Test Accuracy: 0.



Epoch [10/300], Test Accuracy: 0.7438
Epoch [20/300], Test Accuracy: 0.7469
Epoch [30/300], Test Accuracy: 0.7500
Epoch [40/300], Test Accuracy: 0.7719
Epoch [50/300], Test Accuracy: 0.7562
Epoch [60/300], Test Accuracy: 0.7688
Epoch [70/300], Test Accuracy: 0.7656
Epoch [80/300], Test Accuracy: 0.7688
Epoch [90/300], Test Accuracy: 0.7688
Epoch [100/300], Test Accuracy: 0.7719
Epoch [110/300], Test Accuracy: 0.7688
Epoch [120/300], Test Accuracy: 0.7656
Epoch [130/300], Test Accuracy: 0.7625
Epoch [140/300], Test Accuracy: 0.7688
Epoch [150/300], Test Accuracy: 0.7688
Epoch [160/300], Test Accuracy: 0.7688
Epoch [170/300], Test Accuracy: 0.7688
Epoch [180/300], Test Accuracy: 0.7656
Epoch [190/300], Test Accuracy: 0.7719
Epoch [200/300], Test Accuracy: 0.7750
Epoch [210/300], Test Accuracy: 0.7656
Epoch [220/300], Test Accuracy: 0.7656
Epoch [230/300], Test Accuracy: 0.7719
Epoch [240/300], Test Accuracy: 0.7719
Epoch [250/300], Test Accuracy: 0.7688
Epoch [260/300], Test Accuracy: 0.

In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, classification_report

#Set up modern seaborn styling
sns.set_theme(style="whitegrid", font_scale=1.2)  # Modern way to set seaborn style
plt.rcParams['figure.figsize'] = [10, 6]
plt.rcParams['font.family'] = 'serif'

# Define color palette
colors = ['#8ECFC9', '#FFBE7A', '#FA7F6F', '#82B0D2', '#BEB8DC', '#E7DAD2', '#999999']
sns.set_palette(colors)  # Set seaborn color palette

# Calculate metrics
knn_acc = accuracy_score(y_test, knn_predictions)
rf_acc = accuracy_score(y_test, rf_predictions)
gb_acc = accuracy_score(y_test, gb_predictions)
nn_acc = accuracy_score(y_test, nn_predictions)
ensemble_acc = accuracy_score(y_test, ensemble_predictions)

knn_f1 = f1_score(y_test, knn_predictions)
rf_f1 = f1_score(y_test, rf_predictions)
gb_f1 = f1_score(y_test, gb_predictions)
nn_f1 = f1_score(y_test, nn_predictions)
ensemble_f1 = f1_score(y_test, ensemble_predictions)

In [11]:
# 1. Cross-validation Results Plot
plt.figure()
sns.lineplot(x=range(1, len(cv_scores) + 1), y=cv_scores, marker='o', markersize=8)
plt.axhline(y=np.mean(cv_scores), color=colors[6], linestyle='--', 
            label=f'Mean CV Score: {np.mean(cv_scores):.4f}')
plt.fill_between(range(1, len(cv_scores) + 1), 
                 np.mean(cv_scores) - np.std(cv_scores),
                 np.mean(cv_scores) + np.std(cv_scores), 
                 color=colors[6], alpha=0.2)
plt.xlabel('Cross-validation Fold')
plt.ylabel('Accuracy Score')
plt.title('Cross-validation Results')
plt.legend()
plt.tight_layout()
plt.savefig('cv_results.png', dpi=300, bbox_inches='tight')
plt.close()

In [12]:
# 2. Model Architecture Flowchart
plt.figure(figsize=(12, 8))
G = nx.DiGraph()
nodes = [
    f'Input\n({input_dim})', 
    f'Attention\n({attention_dim})', 
    'Weighted\nFeatures',
    f'Hidden 1\n({hidden_dim})', 
    f'Hidden 2\n({hidden_dim})', 
    f'Output\n({num_classes})'
]
positions = {
    f'Input\n({input_dim})': (0, 0.5),
    f'Attention\n({attention_dim})': (1, 1),
    'Weighted\nFeatures': (2, 0.5),
    f'Hidden 1\n({hidden_dim})': (3, 0.5),
    f'Hidden 2\n({hidden_dim})': (4, 0.5),
    f'Output\n({num_classes})': (5, 0.5)
}

for node in nodes:
    G.add_node(node)

edges = [
    (f'Input\n({input_dim})', f'Attention\n({attention_dim})'),
    (f'Input\n({input_dim})', 'Weighted\nFeatures'),
    (f'Attention\n({attention_dim})', 'Weighted\nFeatures'),
    ('Weighted\nFeatures', f'Hidden 1\n({hidden_dim})'),
    (f'Hidden 1\n({hidden_dim})', f'Hidden 2\n({hidden_dim})'),
    (f'Hidden 2\n({hidden_dim})', f'Output\n({num_classes})')
]
G.add_edges_from(edges)

# Clear any existing seaborn styling for the network plot
with plt.style.context('default'):
    nx.draw(G, positions, with_labels=True, node_color=colors[1],
            node_size=3000, font_size=10, font_weight='bold',
            edge_color=colors[6], arrows=True, arrowsize=20)
    plt.title('Model Architecture with Layer Dimensions')
    plt.savefig('model_architecture.png', dpi=300, bbox_inches='tight')
plt.close()

In [15]:
# 3. Model Performance Comparison with F1-Scores
# Calculate F1-scores
knn_f1 = f1_score(y_test, knn_predictions)
rf_f1 = f1_score(y_test, rf_predictions)
gb_f1 = f1_score(y_test, gb_predictions)
nn_f1 = f1_score(y_test, nn_predictions)
ensemble_f1 = f1_score(y_test, ensemble_predictions)

# Create performance comparison DataFrame
performance_df = pd.DataFrame({
    'Model': ['KNN', 'Random Forest', 'Gradient Boosting', 'Neural Network', 'Ensemble'],
    'Accuracy': [
        accuracy_score(y_test, knn_predictions),
        accuracy_score(y_test, rf_predictions),
        accuracy_score(y_test, gb_predictions),
        accuracy_score(y_test, nn_predictions),
        accuracy_score(y_test, ensemble_predictions)
    ],
    'F1-Score': [knn_f1, rf_f1, gb_f1, nn_f1, ensemble_f1]
})

# Plot performance comparison
plt.figure(figsize=(12, 6))
bar_width = 0.35
index = np.arange(len(performance_df['Model']))

plt.bar(index - bar_width/2, performance_df['Accuracy'], bar_width, 
        label='Accuracy', color=colors[0])
plt.bar(index + bar_width/2, performance_df['F1-Score'], bar_width,
        label='F1-Score', color=colors[2])

plt.xlabel('Model')
plt.ylabel('Score')
plt.title('Model Performance Comparison')
plt.xticks(index, performance_df['Model'], rotation=45)
plt.legend()
plt.tight_layout()
plt.savefig('model_comparison.png', dpi=300, bbox_inches='tight')
plt.close()

# Create performance table
performance_table = performance_df.style.format({
    'Accuracy': '{:.4f}',
    'F1-Score': '{:.4f}'
}).to_latex()


In [16]:
import graphviz
from urllib.parse import quote
import json

def create_flowchart_pdf():
    # Create a new directed graph
    dot = graphviz.Digraph('flowchart', 
                          format='pdf',
                          engine='dot',
                          graph_attr={'rankdir': 'TB', 'bgcolor': 'white'})
    
    # Define node styles
    styles = {
        'data': {'color': '#8ECFC9', 'penwidth': '3.0', 'fontcolor': '#8ECFC9', 'style': 'filled', 'fillcolor': 'white'},
        'process': {'color': '#FFBE7A', 'penwidth': '3.0', 'fontcolor': '#FFBE7A', 'style': 'filled', 'fillcolor': 'white'},
        'attention': {'color': '#FA7F6F', 'penwidth': '3.0', 'fontcolor': '#FA7F6F', 'style': 'filled', 'fillcolor': 'white'},
        'layers': {'color': '#82B0D2', 'penwidth': '3.0', 'fontcolor': '#82B0D2', 'style': 'filled', 'fillcolor': 'white'},
        'eval': {'color': '#BEB8DC', 'penwidth': '3.0', 'fontcolor': '#BEB8DC', 'style': 'filled', 'fillcolor': 'white'}
    }
    
    # Add nodes
    # Data Processing
    with dot.subgraph(name='cluster_0') as s:
        s.attr(label='Data Preprocessing', color='#8ECFC9')
        for node in ['Wine Quality Dataset', 'Feature Engineering', 'Train-Test Split', 'StandardScaler', 'Convert to Tensors']:
            s.node(node, **styles['data'])
    
    # Model Architecture
    with dot.subgraph(name='cluster_1') as s:
        s.attr(label='Attention Mechanism', color='#FA7F6F')
        s.node('Attention Layer', **styles['attention'])
        s.node('Multiply', shape='diamond', **styles['attention'])
    
    # Neural Network
    with dot.subgraph(name='cluster_2') as s:
        s.attr(label='Neural Network', color='#82B0D2')
        for node in ['Input Layer', 'BatchNorm + Dropout', 'Hidden Layer 1 ReLU', 'Hidden Layer 2 ReLU', 'Output Layer']:
            s.node(node, **styles['layers'])
    
    # Add edges
    edges = [
        ('Wine Quality Dataset', 'Feature Engineering'),
        ('Feature Engineering', 'Train-Test Split'),
        ('Train-Test Split', 'StandardScaler'),
        ('StandardScaler', 'Convert to Tensors'),
        ('Convert to Tensors', 'Input Layer'),
        ('Input Layer', 'Attention Layer'),
        ('Input Layer', 'Multiply'),
        ('Attention Layer', 'Multiply'),
        ('Multiply', 'BatchNorm + Dropout'),
        ('BatchNorm + Dropout', 'Hidden Layer 1 ReLU'),
        ('Hidden Layer 1 ReLU', 'Hidden Layer 2 ReLU'),
        ('Hidden Layer 2 ReLU', 'Output Layer')
    ]
    
    for edge in edges:
        dot.edge(*edge)
    
    # Save the PDF
    dot.render('flowchart', view=True)

if __name__ == '__main__':
    create_flowchart_pdf()

ModuleNotFoundError: No module named 'graphviz'