# Basic Machine Learning Tutorials

This notebook walks through simple scikit-learn examples step by step.

Run the cell below if you need to install the required libraries. In Google Colab they come pre-installed.

In [None]:
!pip install scikit-learn matplotlib

## 0. Linear regression on synthetic data

Generate a noisy 1D dataset and fit a linear regression model.

In [None]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

X, y, coef = make_regression(n_samples=100, n_features=1, noise=10.0, coef=True, random_state=42)
model = LinearRegression()
model.fit(X, y)
print('True coefficient:', coef)
print('Learned coefficient:', model.coef_[0])
print('Intercept:', model.intercept_)

x_grid = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)
y_pred = model.predict(x_grid)
plt.scatter(X, y, color='blue', label='Data')
plt.plot(x_grid, y_pred, color='red', label='Fit')
plt.xlabel('Feature')
plt.ylabel('Target')
plt.legend()
plt.tight_layout()
plt.show()

## 1. Logistic regression on Iris

Train a logistic regression classifier on the Iris dataset and report accuracy.

In [None]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
clf = LogisticRegression(max_iter=200)
clf.fit(X_train, y_train)
preds = clf.predict(X_test)
print('Accuracy:', accuracy_score(y_test, preds))
ConfusionMatrixDisplay.from_predictions(y_test, preds)
plt.title('Logistic Regression Confusion Matrix')
plt.tight_layout()
plt.show()

## 2. k-NN classification on digits

Use a 3-nearest-neighbor classifier on the handwritten digits dataset.

In [None]:
from sklearn.datasets import load_digits
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

X, y = load_digits(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
preds = knn.predict(X_test)
print('Test accuracy:', accuracy_score(y_test, preds))
ConfusionMatrixDisplay.from_predictions(y_test, preds)
plt.title('k-NN Confusion Matrix')
plt.tight_layout()
plt.show()

## 3. Decision tree classifier

Fit a shallow decision tree on the Iris dataset and show the classification report.

In [None]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
tree = DecisionTreeClassifier(max_depth=3, random_state=42)
tree.fit(X_train, y_train)
preds = tree.predict(X_test)
print(classification_report(y_test, preds))
ConfusionMatrixDisplay.from_predictions(y_test, preds)
plt.title('Decision Tree Confusion Matrix')
plt.tight_layout()
plt.show()

## 4. k-means clustering

Cluster the Iris dataset into three groups using k-means.

In [None]:
from sklearn.datasets import load_iris
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt

X, y = load_iris(return_X_y=True)
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(X)
print('Cluster counts:', np.bincount(clusters))
print('Cluster centers:
', kmeans.cluster_centers_)
plt.scatter(X[:, 0], X[:, 1], c=clusters, cmap='viridis', s=30)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], c='red', marker='x', s=100, linewidths=2, label='Centers')
plt.title('k-means Clustering')
plt.legend()
plt.tight_layout()
plt.show()

## 5. Principal component analysis

Reduce the dimensionality of the digits dataset to two components using PCA.

In [None]:
from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

X, y = load_digits(return_X_y=True)
pca = PCA(n_components=2)
reduced = pca.fit_transform(X)
print('Explained variance ratio:', pca.explained_variance_ratio_)
print('Transformed shape:', reduced.shape)
plt.scatter(reduced[:, 0], reduced[:, 1], c=y, cmap='tab10', s=15)
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.title('PCA of Digits')
plt.tight_layout()
plt.show()

This concludes the brief tour of basic machine learning examples using scikit-learn. Feel free to modify the code cells and explore further!