Data Science Contest - Machine Learning with Iris Dataset
---------------------------------------------------------
Welcome to the Data Science Contest!
Your goal is to build a machine learning model using the Iris dataset.
Complete the coding tasks below to improve the accuracy of your model.

In [None]:

# Step 1: Import necessary libraries
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:

# Step 2: Load the Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Convert to DataFrame for easier handling
df = pd.DataFrame(data=X, columns=iris.feature_names)
df['target'] = y

# Display the first few rows of the dataset
print("Here are the first few rows of the dataset:")
df.head()


In [None]:

# Step 3: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:

# Step 4: Preprocess the data (Scaling features)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:

# Step 5: Train the initial model (Random Forest)
model = RandomForestClassifier(random_state=42)
model.fit(X_train_scaled, y_train)


In [None]:

# Step 6: Make predictions on the test set
y_pred = model.predict(X_test_scaled)


In [None]:

# Step 7: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Initial Accuracy: {accuracy * 100:.2f}%")


In [None]:

# Task 1: Improve the model accuracy
# You are free to change the model, tune hyperparameters, or try different algorithms.
# You can also use techniques like cross-validation, feature selection, etc.

# Your Code for Task 1
# -----------------------------------
# Example: Hyperparameter tuning or trying another model
# Feel free to edit the code below.

# Using K-Nearest Neighbors as an example
from sklearn.neighbors import KNeighborsClassifier

# Initialize the KNN model with different hyperparameters
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train_scaled, y_train)
knn_pred = knn_model.predict(X_test_scaled)

# Evaluate the KNN model
knn_accuracy = accuracy_score(y_test, knn_pred)
print(f"KNN Model Accuracy: {knn_accuracy * 100:.2f}%")


In [None]:

# Step 8: Additional tasks for improving accuracy
# - Experiment with different algorithms (SVM, Decision Tree, etc.)
# - Implement cross-validation and compare results.
# - Use GridSearchCV for hyperparameter tuning.

# Your Code for Additional Tasks
# -----------------------------------
# Example: Implementing GridSearchCV for hyperparameter tuning
from sklearn.model_selection import GridSearchCV

# Define the model and parameters for GridSearchCV
param_grid = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance']
}
grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5)
grid_search.fit(X_train_scaled, y_train)

# Best parameters and accuracy
best_knn_model = grid_search.best_estimator_
best_knn_pred = best_knn_model.predict(X_test_scaled)
best_knn_accuracy = accuracy_score(y_test, best_knn_pred)
print(f"Best KNN Model Accuracy: {best_knn_accuracy * 100:.2f}%")

# Final Notes:
# - The participant with the highest accuracy at the end of the contest wins.
# - Document any changes you make to the code.
