In [3]:
import pandas
import os    

from sklearn.model_selection import train_test_split

In [4]:
pwd = os.getcwd()

df_hu_head = pandas.read_csv(pwd+'/feature_data/hu_head.csv')
df_hu_head = df_hu_head.drop(df_hu_head.columns[0], axis=1)

df_hu_tail = pandas.read_csv(pwd+'/feature_data/hu_tail.csv')
df_hu_tail = df_hu_tail.drop(df_hu_tail.columns[0], axis=1)

# Evaluation Metrics targeted at Classification Models

<b>Precision</b>: It measures the proportion of correctly predicted positive instances out of the total predicted positive instances. It indicates how reliable the model is when predicting positive outcomes.

<b>Recall</b> (Sensitivity or True Positive Rate): It measures the proportion of correctly predicted positive instances out of the actual positive instances. It indicates how well the model identifies positive instances.

<b>F1 Score</b>: It is the harmonic mean of precision and recall. It provides a balanced measure of both precision and recall.

<b>Specificity</b> (True Negative Rate): It measures the proportion of correctly predicted negative instances out of the actual negative instances. It indicates how well the model identifies negative instances.

<b>Confusion Matrix</b>: It provides a tabular representation of the model's predictions against the actual class labels, showing the counts of true positives, true negatives, false positives, and false negatives.

<b>Classification Report</b>: It provides a summary of various evaluation metrics including precision, recall, F1 score, and support for each class label.

# Regression Models 

<b>Linear Regression</b> (Ordinary Least Squares): This is a basic linear regression model that assumes a linear relationship between the input features and the target variable. It minimizes the sum of squared residuals to fit a linear function to the data.

<b>Ridge Regression</b>: Ridge regression is a regularized version of linear regression that adds a penalty term to the loss function. It helps to reduce the impact of multicollinearity in the data and can prevent overfitting.

<b>Lasso Regression</b>: Lasso regression is another regularized linear regression model that uses L1 regularization. It can be useful for feature selection as it tends to set the coefficients of less important features to zero.

<b>ElasticNet Regression</b>: ElasticNet regression combines both L1 (Lasso) and L2 (Ridge) regularization. It can be effective when dealing with datasets that have a large number of features and potential collinearity.


# Evaluation Metrics targeted at Regression Models

<b>Mean Squared Error (MSE)</b>: It measures the average squared difference between the predicted and actual values. Lower values indicate better performance.

<b>Mean Absolute Error (MAE)</b>: It measures the average absolute difference between the predicted and actual values. It is less sensitive to outliers compared to MSE.

<b>R-squared (R2) Score</b>: It represents the proportion of the variance in the target variable that is predictable from the features. It ranges from 0 to 1, with 1 indicating a perfect fit.

In [8]:
import model_generator 
    
hu_head = model_generator.coin_models(df_hu_head,0.8)

# Head Analysis

In [9]:
hh_knn = hu_head.classification_correctness(hu_head.KNN(5),"K Nearest Neighbour")
hh_DecisionTree = hu_head.classification_correctness(hu_head.DecisionTree(),"Decision Tree")
hh_svc = hu_head.classification_correctness(hu_head.SVC(),"SVC") 
hh_rf = hu_head.classification_correctness(hu_head.RandomForest(100),"Random Forest")

In [6]:
hh_reg_log = hu_head.regrssion_correctness(hu_head.logistic_regression(),"Logistic Regression")
hh_reg_lin = hu_head.regrssion_correctness(hu_head.linear_regression(),"Linear Regression")
hh_reg_ridge = hu_head.regrssion_correctness(hu_head.ridge_regression(0.5),"ridge Regression")
hh_reg_lasso = hu_head.regrssion_correctness(hu_head.lasso_regression(0.5),"lasso Regression")
hh_reg_enr = hu_head.regrssion_correctness(hu_head.elastic_net_regression(alpha =0.1,l1_ratio =0.5),"Elastic Net Regression Regression")

Evaluation metrics for: Logistic Regression
Mean Squared Error (MSE): 6.458
Mean Absolute Error (MAE): 1.708
R-squared (R2) Score: -0.824

Evaluation metrics for: Linear Regression
Mean Squared Error (MSE): 3.263
Mean Absolute Error (MAE): 1.481
R-squared (R2) Score: 0.078

Evaluation metrics for: ridge Regression
Mean Squared Error (MSE): 3.586
Mean Absolute Error (MAE): 1.666
R-squared (R2) Score: -0.013

Evaluation metrics for: lasso Regression
Mean Squared Error (MSE): 3.586
Mean Absolute Error (MAE): 1.666
R-squared (R2) Score: -0.013

Evaluation metrics for: Elastic Net Regression Regression
Mean Squared Error (MSE): 3.586
Mean Absolute Error (MAE): 1.666
R-squared (R2) Score: -0.013



# Tail analysis

In [10]:
hu_tail = model_generator.coin_models(df_hu_tail,0.8)

hh_knn = hu_tail.classification_correctness(hu_head.KNN(5),"K Nearest Neighbour")
hh_DecisionTree = hu_tail.classification_correctness(hu_head.DecisionTree(),"Decision Tree")
hh_svc = hu_tail.classification_correctness(hu_head.SVC(),"SVC") 
hh_rf = hu_tail.classification_correctness(hu_head.RandomForest(100),"Random Forest")


# How to apply models in real-life

Utilizing two distinct models, one for classifying the "head" and another for classifying the "tail," the image is subjected to both models, and the outcome with the highest score is considered.

Given the inherent nature of coins, where they can exclusively exhibit either the "heads" or "tails" side, and the challenge in simultaneously observing both sides, the adoption of individual models for each side presents notable advantages.

This rationale can be extended to encompass various deep learning models such as convolutional neural networks, LSTM, GRU, among others.

The software architecture employed in this project facilitates seamless integration of additional functionalities, offering ease of implementation for future extensions.