## ML process
* Loading data,
* Preprocessing,
* Training a model,
* Evaluating the model,
* Making predictions

In [40]:
#import necessary libraries
import pandas as pd ## used for data manipulation and analysis.
from sklearn.linear_model import LogisticRegression
import numpy as np #used for numerical competitions,arrays, maths functions
from sklearn.model_selection import train_test_split #split dataset into training and testing sets
from sklearn.preprocessing import StandardScaler #standarize features from scaling to have a min from 0 and a standard deviation of 1
from sklearn.metrics import accuracy_score #calculate accuracy of model predictions

In [41]:
#Loading the Dataset and Displaying the First Few Rows
iris_data=pd.read_csv('iris.csv')
iris_data.head() #shows first 5 rows

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [42]:
#Split the data into features (X) and labels (Y)
X=iris_data.drop(columns=['Id','Species'])
Y=iris_data['Species']

In [43]:
 #Split the data into training and testing sets
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=42)
#random_state: data random but reproducible(same data when running again)
#test_size=0.2: 20% for testing and 80% for training

In [44]:
X.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [45]:
Y.head()

0    Iris-setosa
1    Iris-setosa
2    Iris-setosa
3    Iris-setosa
4    Iris-setosa
Name: Species, dtype: object

In [46]:
#print X_train
X_train

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
22,4.6,3.6,1.0,0.2
15,5.7,4.4,1.5,0.4
65,6.7,3.1,4.4,1.4
11,4.8,3.4,1.6,0.2
42,4.4,3.2,1.3,0.2
...,...,...,...,...
71,6.1,2.8,4.0,1.3
106,4.9,2.5,4.5,1.7
14,5.8,4.0,1.2,0.2
92,5.8,2.6,4.0,1.2


In [47]:
#print Y_train
Y_train

22         Iris-setosa
15         Iris-setosa
65     Iris-versicolor
11         Iris-setosa
42         Iris-setosa
            ...       
71     Iris-versicolor
106     Iris-virginica
14         Iris-setosa
92     Iris-versicolor
102     Iris-virginica
Name: Species, Length: 120, dtype: object

In [48]:
#Strandardize the features 
scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.fit_transform(X_test)

In [49]:
#print X_train_scaled
X_train_scaled

array([[-1.47393679,  1.22037928, -1.5639872 , -1.30948358],
       [-0.13307079,  3.02001693, -1.27728011, -1.04292204],
       [ 1.08589829,  0.09560575,  0.38562104,  0.28988568],
       [-1.23014297,  0.77046987, -1.21993869, -1.30948358],
       [-1.7177306 ,  0.32056046, -1.39196294, -1.30948358],
       [ 0.59831066, -1.25412249,  0.72966956,  0.95628954],
       [ 0.72020757,  0.32056046,  0.44296246,  0.42316645],
       [-0.74255534,  0.99542457, -1.27728011, -1.30948358],
       [-0.98634915,  1.22037928, -1.33462153, -1.30948358],
       [-0.74255534,  2.34515281, -1.27728011, -1.44276436],
       [-0.01117388, -0.80421307,  0.78701097,  0.95628954],
       [ 0.23261993,  0.77046987,  0.44296246,  0.55644722],
       [ 1.08589829,  0.09560575,  0.5576453 ,  0.42316645],
       [-0.49876152,  1.8952434 , -1.39196294, -1.04292204],
       [-0.49876152,  1.44533399, -1.27728011, -1.30948358],
       [-0.37686461, -1.47907719, -0.01576889, -0.24323741],
       [ 0.59831066, -0.

In [50]:
#print X_test_scaled
X_test_scaled

array([[ 0.14443512, -0.63451517,  0.45110832, -0.07943674],
       [-0.33701527,  2.00929805, -1.2060243 , -1.20827465],
       [ 2.07023667, -1.16327782,  1.66633891,  1.30025404],
       [ 0.02407252, -0.37013385,  0.34063282,  0.29684256],
       [ 0.98697329, -0.63451517,  0.50634608,  0.17141613],
       [-0.69810306,  0.95177276, -1.3164998 , -1.08284822],
       [-0.45737787, -0.37013385, -0.15650697,  0.04598969],
       [ 1.10733589,  0.15862879,  0.67205934,  1.30025404],
       [ 0.26479771, -2.22080311,  0.34063282,  0.29684256],
       [-0.21665267, -0.8988965 ,  0.00920629, -0.07943674],
       [ 0.6258855 ,  0.42301012,  0.67205934,  0.92397473],
       [-1.42027864, -0.10575253, -1.37173756, -1.45912752],
       [-0.57774046,  1.21615408, -1.42697531, -1.33370109],
       [-1.29991605,  0.15862879, -1.3164998 , -1.45912752],
       [-1.05919085,  2.00929805, -1.3164998 , -1.20827465],
       [ 0.38516031,  0.68739144,  0.45110832,  0.422269  ],
       [ 0.6258855 , -0.

In [51]:
#Create a ML model
model=LogisticRegression()

In [52]:
#train the model
model.fit(X_train_scaled,Y_train) #learning relationship btwn scaled features and target variable

In [60]:
#Evaluate the model on the testing set
Y_pred=model.predict(X_test_scaled)

In [61]:
#Accuracy
accuracy=accuracy_score(Y_test,Y_pred)
print("Accuracy: ",accuracy)

Accuracy:  0.9666666666666667


In [62]:
# Sample new data for prediction
new_data=np.array([[5.1,3.5,1.4,0.2],
                   [6.3,2.9,5.6,1.8],
                   [4.9,3.0,1.4,0.2]])

In [63]:
#Standardize the new data
new_data_scaled=scaler.fit_transform(new_data)

In [66]:
#Make predictions
predictions=model.predict(new_data_scaled)
print(predictions)

['Iris-setosa' 'Iris-virginica' 'Iris-setosa']
