In [None]:
## IMPORT MODULES ##

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report,confusion_matrix

In [None]:
## LOAD CSV AS DATAFRAME ##

df = pd.read_csv('../data/iris.csv')
df.head()

In [None]:
## GET SPECIFIC COLUMNS ##

df_petal = df['petallength']
df_petal

In [None]:
## FILTER COLUMNS BASED ON CONDITIONS ##

df_filtered = df[(df['petallength'] > 3) & (df['sepallength'] > 1)]
df_filtered

In [None]:
## UNIQUE CLASSES ##

unique_classes = set(df['class'])
print(unique_classes)

In [None]:
## PCA ##

X = df.drop(['class'],axis=1)
Y = df['class']

z_scaler = StandardScaler()
pca = PCA(n_components=2)

scaled_data = z_scaler.fit_transform(X)
iris_pcs = pca.fit_transform(scaled_data)
df_master = pd.DataFrame(data=iris_pcs,columns=['PC1','PC2'])
df_master['class'] = Y

sns.scatterplot(data=df_master,x='PC1',y='PC2',hue='class',palette='tab10')


In [None]:
## MACHINE LEARNING ##

x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.3,random_state=42)
rf = RandomForestClassifier(n_estimators=100,random_state=42)
rf.fit(x_train,y_train)

predictions = rf.predict(x_test)
print(classification_report(y_test,predictions))

cm = confusion_matrix(y_test,predictions)
sns.heatmap(cm, fmt='g',annot=True, cmap='Blues', xticklabels=unique_classes, yticklabels=unique_classes)
plt.xlabel('Predicted',labelpad=20)
plt.ylabel('True',labelpad=20)
plt.show()