#### Data Ingestion

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split


dataset = pd.read_csv('../features/features_200.csv')

dataset.head(10)

#### Split the Dataset

In [None]:
# column split
x_data = dataset.drop(columns=['Unnamed: 0'])     
x_data = x_data.iloc[:, :-1]                                   
y_data = dataset['font_type']                              
x_data.head()
#train-test split
x_data, y_data = x_data.to_numpy(), y_data.to_numpy()
x_train, x_val, y_train, y_val = train_test_split(x_data, y_data, test_size=0.2, random_state=75, stratify=y_data)

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.metrics import accuracy_score

from BayesClassifier import BayesClassifier
from KDEstimator import KDEstimator

### Gaussian Naive Bayes

In [None]:
# Initiate
gnb_model = BayesClassifier(mode='GNB')
# Fit
gnb_model.fit(x_train, y_train)
# Predict
gnb_y_pred = gnb_model.predict(x_val)
# Evalute
gnb_accuracy = gnb_model.score(x_val, y_val)
print("Gaussian Naive Bayes Accuracy:", gnb_accuracy)

#### Linear Discriminant Analysis

In [None]:
# Initiate
lda_model = BayesClassifier(mode='LDA')
# Fit
lda_model.fit(x_train, y_train)
# Predict
lda_y_pred = lda_model.predict(x_val)
# Evaluate
lda_accuracy = lda_model.score(x_val, y_val)
print("Linear Discriminant Analysis Accuracy:", lda_accuracy)

#### Quadratic Discriminant Analysis

In [None]:
qda_model = BayesClassifier(mode='QDA')
qda_model.fit(x_train, y_train)
qda_y_pred = qda_model.predict(x_val)
qda_accuracy = qda_model.score(x_val, y_val)
print("Quadratic Discriminant Analysis Accuracy:", qda_accuracy)

#### Kernel Density Estimation

In [None]:
# TODO 2: Call KDEstimator with Gauss bump and bandwidth=0.5
kde_config = KDEstimator(bump='Gauss', bandwidth=9)
# TODO 3: Call BayesClassifier in KDE mode and pass it that instance
kde_model = BayesClassifier(mode='KDE', kde_config=kde_config)
kde_model.fit(x_train, y_train)
kde_y_pred = kde_model.predict(x_val)
kde_accuracy = kde_model.score(x_val, y_val)
print("Bayes Classifier with KDE Accuracy:", kde_accuracy)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# Create a Random Forest classifier
# rf_model = RandomForestClassifier(n_estimators=1000, random_state=42)
rf_model = RandomForestClassifier(n_estimators=1000, max_depth=10, min_samples_split=5, min_samples_leaf=4, max_features='sqrt', random_state=42)

# Train the model
rf_model.fit(x_train, y_train)

# Make predictions on the test set
y_pred = rf_model.predict(x_val)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_val, y_pred)
print("Accuracy:", accuracy)





In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

# Fit on training set only.
scaler.fit(x_train)

# Apply transform to both the training set and the test set.
x_train = scaler.transform(x_train)
x_val = scaler.transform(x_val)

svm_model = svm.SVC(kernel='poly', C=100, gamma='auto', degree=3, coef0=1)

# svm_model = svm.SVC(kernel='sigmoid', C=10, gamma='scale')
# svm_model = svm.SVC(kernel='poly', C=10, gamma='auto', degree=3, coef0=1)
# svm_model = svm.SVC(kernel='rbf', C=10000, gamma='scale')
svm_model.fit(x_train, y_train)

# Step 4: Make predictions on the testing data
y_pred = svm_model.predict(x_val)

# Step 5: Evaluate the model's performance
accuracy = accuracy_score(y_val, y_pred)
print("svm Accuracy:", accuracy)