<a href="https://colab.research.google.com/github/Amulya-Rao-mj/Machine-learning-lab-/blob/main/mllab3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
### two- way splitting
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import KBinsDiscretizer, PolynomialFeatures
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score, precision_score, recall_score, classification_report

# Parameters
n_samples = 500
n_features = 10
n_bins = 3

# Generate synthetic data with reduced noise
X = np.random.rand(n_samples, n_features)
true_coefficients = np.random.randn(n_features)
y_continuous = X @ true_coefficients + np.random.normal(0, 0.1, size=n_samples)  # reduced noise

# Add polynomial features (degree=2)
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)

# Discretize continuous target into classes
discretizer = KBinsDiscretizer(n_bins=n_bins, encode='ordinal', strategy='uniform')
y_classes = discretizer.fit_transform(y_continuous.reshape(-1, 1)).astype(int).ravel()

# Train-test split on polynomial features
X_train, X_test, y_train_cont, y_test_cont, y_train_cls, y_test_cls = train_test_split(
    X_poly, y_continuous, y_classes, test_size=0.2, random_state=42)

# Train linear regression model on polynomial features
model = LinearRegression()
model.fit(X_train, y_train_cont)

# Predict continuous values
y_pred_cont = model.predict(X_test)

# Convert predicted continuous values to classes
y_pred_cls = discretizer.transform(y_pred_cont.reshape(-1,1)).astype(int).ravel()

# Calculate regression metrics
mae = mean_absolute_error(y_test_cont, y_pred_cont)
mse = mean_squared_error(y_test_cont, y_pred_cont)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_cont, y_pred_cont)

# Calculate classification metrics
conf_matrix = confusion_matrix(y_test_cls, y_pred_cls)
accuracy = accuracy_score(y_test_cls, y_pred_cls)
precision = precision_score(y_test_cls, y_pred_cls, average='weighted')
recall = recall_score(y_test_cls, y_pred_cls, average='weighted')
f1 = f1_score(y_test_cls, y_pred_cls, average='weighted')
class_report = classification_report(y_test_cls, y_pred_cls)

# Print results
print("=== Classification Metrics ===")
print("Confusion Matrix:")
print(conf_matrix)
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision (weighted): {precision:.4f}")
print(f"Recall (weighted): {recall:.4f}")
print(f"F1 Score (weighted): {f1:.4f}")
print("\nClassification Report:")
print(class_report)

print("=== Regression Metrics ===")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R² Score: {r2:.4f}")


=== Classification Metrics ===
Confusion Matrix:
[[14  1  0]
 [ 3 60  3]
 [ 0  1 18]]
Accuracy: 0.9200
Precision (weighted): 0.9251
Recall (weighted): 0.9200
F1 Score (weighted): 0.9210

Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.93      0.88        15
           1       0.97      0.91      0.94        66
           2       0.86      0.95      0.90        19

    accuracy                           0.92       100
   macro avg       0.88      0.93      0.90       100
weighted avg       0.93      0.92      0.92       100

=== Regression Metrics ===
MAE: 0.0831
MSE: 0.0107
RMSE: 0.1033
R² Score: 0.9937


In [2]:
#three-way splitting
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import KBinsDiscretizer, PolynomialFeatures
from sklearn.metrics import (
    mean_absolute_error, mean_squared_error, r2_score,
    confusion_matrix, f1_score, accuracy_score,
    precision_score, recall_score, classification_report
)

# Parameters
n_samples = 500
n_features = 10
n_bins = 3  # Number of classes after binning

# Generate synthetic regression data (with some noise)
X = np.random.rand(n_samples, n_features)
true_coefficients = np.random.randn(n_features)
y_continuous = X @ true_coefficients + np.random.normal(0, 0.1, size=n_samples)  # reduced noise

# Convert continuous target to discrete classes using KBinsDiscretizer
discretizer = KBinsDiscretizer(n_bins=n_bins, encode='ordinal', strategy='uniform')
y_classes = discretizer.fit_transform(y_continuous.reshape(-1, 1)).astype(int).ravel()

# Add polynomial features to improve model capacity
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)

# Three-way split: train (60%), validation (20%), test (20%)

# Step 1: split train and temp (val+test)
X_train, X_temp, y_train_cont, y_temp_cont, y_train_cls, y_temp_cls = train_test_split(
    X_poly, y_continuous, y_classes, test_size=0.4, random_state=42)

# Step 2: split temp into validation and test sets equally
X_val, X_test, y_val_cont, y_test_cont, y_val_cls, y_test_cls = train_test_split(
    X_temp, y_temp_cont, y_temp_cls, test_size=0.5, random_state=42)

print(f"Train size: {X_train.shape[0]}")
print(f"Validation size: {X_val.shape[0]}")
print(f"Test size: {X_test.shape[0]}")

# Train Linear Regression model on training data
model = LinearRegression()
model.fit(X_train, y_train_cont)

# Predict continuous outputs on the test set
y_pred_cont = model.predict(X_test)

# Convert predicted continuous values to discrete classes using the same discretizer
y_pred_cls = discretizer.transform(y_pred_cont.reshape(-1, 1)).astype(int).ravel()

# Regression metrics
mae = mean_absolute_error(y_test_cont, y_pred_cont)
mse = mean_squared_error(y_test_cont, y_pred_cont)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_cont, y_pred_cont)

# Classification metrics
conf_matrix = confusion_matrix(y_test_cls, y_pred_cls)
accuracy = accuracy_score(y_test_cls, y_pred_cls)
precision = precision_score(y_test_cls, y_pred_cls, average='weighted')
recall = recall_score(y_test_cls, y_pred_cls, average='weighted')
f1 = f1_score(y_test_cls, y_pred_cls, average='weighted')
class_report = classification_report(y_test_cls, y_pred_cls)

# Print classification results
print("\n=== Classification Metrics ===")
print("Confusion Matrix:")
print(conf_matrix)
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision (weighted): {precision:.4f}")
print(f"Recall (weighted): {recall:.4f}")
print(f"F1 Score (weighted): {f1:.4f}")
print("\nClassification Report:")
print(class_report)

# Print regression results
print("=== Regression Metrics ===")
print(f"MAE: {mae:.4f}")
print(f"MSE: {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R² Score: {r2:.4f}")


Train size: 300
Validation size: 100
Test size: 100

=== Classification Metrics ===
Confusion Matrix:
[[17  1  0]
 [ 1 59  1]
 [ 0  0 21]]
Accuracy: 0.9700
Precision (weighted): 0.9703
Recall (weighted): 0.9700
F1 Score (weighted): 0.9700

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.94      0.94        18
           1       0.98      0.97      0.98        61
           2       0.95      1.00      0.98        21

    accuracy                           0.97       100
   macro avg       0.96      0.97      0.97       100
weighted avg       0.97      0.97      0.97       100

=== Regression Metrics ===
MAE: 0.0954
MSE: 0.0152
RMSE: 0.1232
R² Score: 0.9811


In [3]:
#k-fold
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import KBinsDiscretizer, PolynomialFeatures
from sklearn.metrics import (
    mean_absolute_error, mean_squared_error, r2_score,
    accuracy_score, precision_score, recall_score, f1_score
)
from sklearn.model_selection import KFold

n_samples = 500
n_features = 10
X = np.random.rand(n_samples, n_features)

true_coefficients = np.random.randn(n_features)

y_continuous = X @ true_coefficients + np.random.normal(0, 0.1, size=n_samples)

discretizer = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
y_classes = discretizer.fit_transform(y_continuous.reshape(-1,1)).astype(int).ravel()

poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)

kf = KFold(n_splits=5, shuffle=True, random_state=42)

mae_scores, mse_scores, r2_scores = [], [], []
accuracy_scores, precision_scores, recall_scores, f1_scores = [], [], [], []

for fold, (train_idx, val_idx) in enumerate(kf.split(X_poly), 1):
    X_train, X_val = X_poly[train_idx], X_poly[val_idx]
    y_train_cont, y_val_cont = y_continuous[train_idx], y_continuous[val_idx]
    y_train_cls, y_val_cls = y_classes[train_idx], y_classes[val_idx]

    model = LinearRegression()
    model.fit(X_train, y_train_cont)

    y_pred_cont = model.predict(X_val)

    mae = mean_absolute_error(y_val_cont, y_pred_cont)
    mse = mean_squared_error(y_val_cont, y_pred_cont)
    r2 = r2_score(y_val_cont, y_pred_cont)

    y_pred_cls = discretizer.transform(y_pred_cont.reshape(-1,1)).astype(int).ravel()

    accuracy = accuracy_score(y_val_cls, y_pred_cls)
    precision = precision_score(y_val_cls, y_pred_cls, average='weighted', zero_division=0)
    recall = recall_score(y_val_cls, y_pred_cls, average='weighted', zero_division=0)
    f1 = f1_score(y_val_cls, y_pred_cls, average='weighted', zero_division=0)

    mae_scores.append(mae)
    mse_scores.append(mse)
    r2_scores.append(r2)
    accuracy_scores.append(accuracy)
    precision_scores.append(precision)
    recall_scores.append(recall)
    f1_scores.append(f1)

    print(f"Fold {fold}: R2={r2:.4f}, MAE={mae:.4f}, Accuracy={accuracy:.4f}, F1={f1:.4f}")

print("\n=== Average Metrics Over 5 Folds ===")
print(f"R2 Score: {np.mean(r2_scores):.4f}")
print(f"MAE: {np.mean(mae_scores):.4f}")
print(f"MSE: {np.mean(mse_scores):.4f}")
print(f"Accuracy: {np.mean(accuracy_scores):.4f}")
print(f"Precision: {np.mean(precision_scores):.4f}")
print(f"Recall: {np.mean(recall_scores):.4f}")
print(f"F1 Score: {np.mean(f1_scores):.4f}")


Fold 1: R2=0.9860, MAE=0.0796, Accuracy=0.9400, F1=0.9402
Fold 2: R2=0.9874, MAE=0.0834, Accuracy=0.9400, F1=0.9391
Fold 3: R2=0.9894, MAE=0.0807, Accuracy=0.9700, F1=0.9699
Fold 4: R2=0.9868, MAE=0.0919, Accuracy=0.9600, F1=0.9602
Fold 5: R2=0.9866, MAE=0.0915, Accuracy=0.9500, F1=0.9501

=== Average Metrics Over 5 Folds ===
R2 Score: 0.9872
MAE: 0.0854
MSE: 0.0117
Accuracy: 0.9520
Precision: 0.9532
Recall: 0.9520
F1 Score: 0.9519
