In [1]:
import pandas as pd
import numpy as np

from sklearn import datasets

In [2]:
# prompt: create a code to load iris data

# Load the iris dataset
iris = datasets.load_iris()

# Create a DataFrame from the iris data
df_iris = pd.DataFrame(data=np.c_[iris['data'], iris['target']],
                      columns=iris['feature_names'] + ['target'])

# Display the first 5 rows
print(df_iris.head())

   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  
0     0.0  
1     0.0  
2     0.0  
3     0.0  
4     0.0  


# **Splitting Dataset**

In [3]:
# prompt: create code for splitting dataset

from sklearn.model_selection import train_test_split

# Define features (X) and target (y)
X = df_iris.drop('target', axis=1)
y = df_iris['target']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Print the shapes of the resulting sets
print("\nShape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)



Shape of X_train: (120, 4)
Shape of X_test: (30, 4)
Shape of y_train: (120,)
Shape of y_test: (30,)


# **Modeling**

In [4]:
# prompt: create code for training using random forest

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Initialize the Random Forest Classifier
# You can adjust parameters like n_estimators (number of trees)
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy of the Random Forest model: {accuracy:.4f}")


Accuracy of the Random Forest model: 1.0000


In [5]:
# prompt: create code for training using naive bayes

from sklearn.naive_bayes import GaussianNB

# Initialize the Gaussian Naive Bayes model
naive_bayes_model = GaussianNB()

# Train the model
naive_bayes_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_nb = naive_bayes_model.predict(X_test)

# Evaluate the model
accuracy_nb = accuracy_score(y_test, y_pred_nb)
print(f"\nAccuracy of the Naive Bayes model: {accuracy_nb:.4f}")



Accuracy of the Naive Bayes model: 1.0000


# **Save Model**

In [6]:
# prompt: create a code to save the model

import joblib

# Define the filename for saving the model
model_filename = 'random_forest_model.pkl'

# Save the trained Random Forest model to a file
joblib.dump(model, model_filename)

print(f"Random Forest model saved to {model_filename}")

# Define the filename for saving the Naive Bayes model
naive_bayes_model_filename = 'naive_bayes_model.pkl'

# Save the trained Naive Bayes model to a file
joblib.dump(naive_bayes_model, naive_bayes_model_filename)

print(f"Naive Bayes model saved to {naive_bayes_model_filename}")

Random Forest model saved to random_forest_model.pkl
Naive Bayes model saved to naive_bayes_model.pkl
