In [None]:
#Standards imports
import os
#Third-Party imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets

#Local imports
sns.set()
data=datasets.load_iris()
df=pd.DataFrame(data["data"],columns=data["feature_names"])
df["target"]=data["target"]
df.head()
df.describe()
col="sepal length (cm)"
df[col].hist()
plt.suptitle(col)
plt.show()

col="sepal width (cm)"
df[col].hist()
plt.suptitle(col)
plt.show()

col="petal length (cm)"
df[col].hist()
plt.suptitle(col)
plt.show()

col="petal width (cm)"
df[col].hist()
plt.suptitle(col)
plt.show()

# create new column with the species name;
df["target_name"]=df["target"].map({0:"setosa",1:"versicolor",2:"virginica"})

col="sepal length (cm)"
sns.relplot(x=col, y="target", hue="target_name",data=df)
_=plt.suptitle(col,y=1.02)


col="sepal width (cm)"
sns.relplot(x=col, y="target", hue="target_name",data=df)
_=plt.suptitle(col,y=1.02)

col="petal length (cm)"
sns.relplot(x=col, y="target", hue="target_name",data=df)
_=plt.suptitle(col,y=1.02)


col="petal width (cm)"
sns.relplot(x=col, y="target", hue="target_name",data=df)
_=plt.suptitle(col,y=1.02)

sns.pairplot(df,hue="target_name")
from sklearn.model_selection import train_test_split
df_train,df_test=train_test_split(df,test_size=0.25)
df_train.shape
print(df_train)
df_train.head()

x_train=df_train.drop(columns=["target","target_name"]).values
y_train=df_train["target"].values
x_train
y_train



def single_feature_prediction(petal_length):
    """Predicts the Iris species given the petal length"""
    if petal_length<2.5:
        return 0  # 0- setosa
    elif petal_length<4.8:
        return 1 #1- versicolor
    else:
        return 2 #2-virginica
x_train[:,2]

manual_y_predictions=np.array([single_feature_prediction(val) for val in x_train[:,2]])
manual_model_accuracy=np.mean(manual_y_predictions==y_train)
print(f"Manual model accuracy:{manual_model_accuracy*100: .2f}%")



from sklearn.linear_model import LogisticRegression
model=LogisticRegression(max_iter=200)
# xt stands x_train, and xv stands for x_validation
xt,xv,yt,yv=train_test_split(x_train,y_train,test_size=0.25)
model.fit(xt,yt)
y_pred=model.predict(xv)  #y_predict
model.score(xv,yv)



from sklearn.model_selection import cross_val_score,cross_val_predict
model=LogisticRegression(max_iter=200)
model.fit(xt,yt)
accuracies=cross_val_score(model,x_train,y_train,cv=5,scoring="accuracy")
np.mean(accuracies)

from sklearn.tree import DecisionTreeClassifier
print("\nDecision Tree Classifier:")
dt_model = DecisionTreeClassifier()
dt_accuracies = cross_val_score(dt_model, x_train, y_train, cv=5, scoring="accuracy")
print(f"Decision Tree Accuracy: {np.mean(dt_accuracies) * 100:.2f}%")

from sklearn.neighbors import KNeighborsClassifier
print("\nK-Nearest Neighbors Classifier:")
knn_model = KNeighborsClassifier(n_neighbors=3)  # You can tweak n_neighbors
knn_accuracies = cross_val_score(knn_model, x_train, y_train, cv=5, scoring="accuracy")
print(f"KNN Accuracy (k=3): {np.mean(knn_accuracies) * 100:.2f}%")


from sklearn.svm import SVC
print("\nSupport Vector Machine Classifier:")
svm_model = SVC(kernel='linear')  # You can also try 'rbf', 'poly', etc.
svm_accuracies = cross_val_score(svm_model, x_train, y_train, cv=5, scoring="accuracy")
print(f"SVM Accuracy (Linear Kernel): {np.mean(svm_accuracies) * 100:.2f}%")



y_pred=cross_val_predict(model,x_train,y_train,cv=5)
predicted_correctly_mask=y_pred==y_train
not_predicted_correctly=~predicted_correctly_mask
x_train[not_predicted_correctly]

df_predictions=df_train.copy()
df_predictions["correct_prediction"]=predicted_correctly_mask
df_predictions["prediction"]=y_pred
df_predictions["prediction_label"]=df_predictions["prediction"].map({0:"setosa",1:"versicolor",2:"virginica"})
df_predictions.head()


sns.scatterplot(x="petal length (cm)",y="petal width (cm)", hue="prediction_label",data=df_predictions)
sns.scatterplot(x="petal length (cm)",y="petal width (cm)", hue="target_name",data=df_predictions)


def plot_incorrect_predictions(df_predictions,x_axis_feature,y_axis_feature):
    fig, axs=plt.subplots(2,2,figsize=(10,10))
    axs=axs.flatten()
    sns.scatterplot(x=x_axis_feature,y=y_axis_feature,hue="prediction_label",data=df_predictions,ax=axs[0])
    sns.scatterplot(x=x_axis_feature,y=y_axis_feature,hue="target_name",data=df_predictions,ax=axs[1])

    sns.scatterplot(x=x_axis_feature,y=y_axis_feature,hue="correct_prediction",data=df_predictions,ax=axs[2])
    axs[3].set_visible(False)
    plt.show()


plot_incorrect_predictions(df_predictions,"petal length (cm)", "petal width (cm)")



for reg_param in (0.1,0.3,0.9,1,1.3,1.8,2,5,10,15):
    print(reg_param)
    model=LogisticRegression(max_iter=200,C =reg_param)
    accuracies=cross_val_score(model,x_train,y_train,cv=5,scoring="accuracy")
    print(f"Accuracy :{np.mean(accuracies)*100:.2f}%")



model=LogisticRegression(max_iter=200,C=2)
x_test=df_train.drop(columns=["target","target_name"]).values
y_test=df_train["target"].values

model.fit(x_train,y_train)
model.get_params()
y_test_pred=model.predict(x_test)
y_test_pred.shape

test_set_correctly_classified=y_test_pred==y_test
test_set_accuracy=np.mean(test_set_correctly_classified)
print(f"Test set accuracy :{test_set_accuracy*100:.2f}")

df_predictions_test.head()

plot_incorrect_predictions(df_predictions,x_axis_feature="petal length (cm)",y_axis_feature="petal width (cm)")



#Creating a confusion matrix for each model;
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Recreate the models for prediction on df_train (same as your x_test/y_test)
# Logistic Regression
model = LogisticRegression(max_iter=200, C=2)
model.fit(xt, yt)
y_pred_logreg = model.predict(xt)

# Decision Tree
dt_model = DecisionTreeClassifier()
dt_model.fit(x_train, y_train)
y_pred_dt = dt_model.predict(xt)

# K-Nearest Neighbors
knn_model = KNeighborsClassifier(n_neighbors=3)
knn_model.fit(x_train, y_train)
y_pred_knn = knn_model.predict(xt)

# Support Vector Machine
svm_model = SVC(kernel='linear')
svm_model.fit(x_train, y_train)
y_pred_svm = svm_model.predict(xt)

# Plot confusion matrices
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

ConfusionMatrixDisplay(confusion_matrix(yt, y_pred_logreg), display_labels=["setosa", "versicolor", "virginica"]).plot(ax=axes[0, 0], colorbar=False)
axes[0, 0].set_title("Logistic Regression")

ConfusionMatrixDisplay(confusion_matrix(yt, y_pred_dt), display_labels=["setosa", "versicolor", "virginica"]).plot(ax=axes[0, 1], colorbar=False)
axes[0, 1].set_title("Decision Tree")

ConfusionMatrixDisplay(confusion_matrix(yt, y_pred_knn), display_labels=["setosa", "versicolor", "virginica"]).plot(ax=axes[1, 0], colorbar=False)
axes[1, 0].set_title("K-Nearest Neighbors")

ConfusionMatrixDisplay(confusion_matrix(yt, y_pred_svm), display_labels=["setosa", "versicolor", "virginica"]).plot(ax=axes[1, 1], colorbar=False)
axes[1, 1].set_title("Support Vector Machine")

plt.tight_layout()
plt.show()
