In [None]:
import pandas as pd
from sklearn.datasets import load_iris
import seaborn as sns
import warnings
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

#machine learning imports.
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

#evaluating function for ML.
from sklearn.metrics import accuracy_score


In [None]:
iris_data = pd.read_csv('Iris.csv') #reading the data file.
iris_data.head() #shows values in the file.
iris_data.info() #showcases information about the file.

In [None]:
iris_data.drop_duplicates() #deletes duplicate rows and columns.

In [None]:
iris_data.isna().sum() #of null values.

In [None]:
iris_data['Species'].unique() #unique species.
iris_data['Species'].nunique() #of unique species.


In [None]:
flowers = load_iris()
type(flowers) #shows the type of flowers variable.

In [None]:
flowers_df = pd.DataFrame(flowers.data, columns = flowers.feature_names) #convert flowers to pandas dataframe.
flowers_df.head() #shows the data in the flowers variable.

In [None]:
flowers.target_names
flowers_df['Species'] = flowers.target

In [None]:
flowers_df.head()

In [None]:
warnings.filterwarnings("ignore")
warnings.filterwarnings("ignore", category=FutureWarning) #to get rid of the annoying warnings.

sns.pairplot(flowers_df, hue='Species', diag_kind='hist', palette='rocket') #created a pairplot using seaborn, with Species column set as the hue.

In [None]:
#train_test_split(flowers_df) #30(data) to 70 ( for training) split

features = flowers_df[['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)','petal width (cm)', 'Species']]
target_column = flowers_df['Species'] #the target column/s is species.
target_column.head()

In [None]:
train_features, test_features, train_target, test_target = train_test_split(features, target_column, test_size=0.3, random_state=42) #30(data) to 70 ( for training) split

#70% of 150 =105 rows, 30% = 45 rows.

In [None]:
train_target.shape #shows we have 105 rows for the training of the machine.

In [None]:
test_target.shape #shows the amount of rows for the testing.

In [None]:
knn_1 = KNeighborsClassifier(n_neighbors=1) #creates a kneighbors classifier.
knn_1.fit(train_features,train_target) #training the kneighbors classifier using the target data.

In [None]:
pred1 = knn_1.predict(test_features) #making a prediction based on the data given to the model.
print(pred1)
print(" ")
acc_1 = accuracy_score(test_target, pred1)#calculating the accuracy of the prediction.
print(f"The accuracy of the prediction was {acc_1*100}") #prints out 1.0, meaning the model is 100% accurate.

In [None]:
knn_3 = KNeighborsClassifier(n_neighbors=3) #creates a kneighbors classifier with three neighbors.
knn_3.fit(train_features, train_target) #training the kneighbors classifier using the target data.

In [None]:
pred2 = knn_3.predict(test_features)
print(pred2)

acc_2 = accuracy_score(test_target, pred2) #calculating the accuracy of the prediction.
print(f"The accuracy of the prediction was {acc_2*100}") #prints out 1.0, meaning the model is 100% accurate.


In [None]:
log_model = LogisticRegression()
log_model.fit(train_features, train_target)

In [None]:
log_pred1 = log_model.predict(test_features)
print(log_pred1)

log_acc1 = accuracy_score(test_target, log_pred1)
print(f"The accuracy of the model is {log_acc1*100}")

In [None]:

input_1 = [[4,2,4,0.2, 3]]
input_2 = [[5,0.4,2,0.5, 10]]

knn_pred_1 = knn_1.predict(input_1)
log_pred_1 = log_model.predict(input_1)

knn_pred_2 = knn_1.predict(input_2)
log_pred_2 = log_model.predict(input_2)

print(f"Flower 1:\n KNN: {knn_pred_1}, Log: {log_pred_1}")
print(f"Flower 2:\n KNN: {knn_pred_2}, Log: {log_pred_2}")

In [None]:
n_v = range(1,12)
accuracies = []

for n in n_v: #for numbers in range...

    knn = KNeighborsClassifier(n_neighbors=n) #creates a kneighbors classifier with n neighbors.

    knn.fit(train_features, train_target) #training the kneighbors classifier using the target data.

    accuracy_ = (knn.score(test_features, test_target)) * 100 #multiplying the percent by 100.

    accuracies.append(accuracy_) #appending the accuracies.

    print(f"The accuracy of n is: {accuracies}") #printing accuracy to the screen.


In [None]:
plt.figure(figsize= (6,4))
sns.lineplot(x=n_v, y=accuracies)
plt.show()