# Introduction to Machine Learning

This notebook provides an introduction to machine learning concepts and basic workflow using scikit-learn.

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Import our custom modules
import sys
sys.path.append('..')
from src.data.data_loader import load_dataset, split_dataset
from src.utils.visualization import plot_confusion_matrix

## 1. Loading and Exploring Data

We'll start by loading a sample dataset (e.g., iris dataset) and exploring its characteristics.

In [None]:
from sklearn.datasets import load_iris

# Load iris dataset
iris = load_iris()
data = pd.DataFrame(iris.data, columns=iris.feature_names)
data['target'] = iris.target

# Display first few rows
print("Dataset Shape:", data.shape)
data.head()

## 2. Data Preprocessing

Next, we'll prepare our data for training.

In [None]:
# Split the dataset
X_train, X_test, y_train, y_test = split_dataset(
    data=data,
    target_column='target',
    test_size=0.2,
    random_state=42
)

## 3. Training a Simple Model

Let's train a simple classification model.

In [None]:
from sklearn.tree import DecisionTreeClassifier

# Create and train the model
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Plot confusion matrix
plot_confusion_matrix(
    y_test,
    y_pred,
    labels=iris.target_names
)