<a href="https://colab.research.google.com/github/Anushriya3007/ML-Lab/blob/main/ML%20Lab%203.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# =======================================
# FINALIZED ML PIPELINE (Regression + Classification)
# =======================================

# --- Colab Upload ---
from google.colab import files
uploaded = files.upload()

import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, f1_score, accuracy_score

# -------------------------
# Step 1: Load Iris Dataset
# -------------------------
df = pd.read_csv("iris_dataset.csv")
print("Shape of dataset:", df.shape)
print(df.head())

X = df.drop("target", axis=1)
y = df["target"]

# -------------------------
# Step 2: Regression Example
# -------------------------
print("\n--- Regression with KFold (Demo Dataset) ---")
data = pd.DataFrame({
    'feature': range(1, 13),
    'target': range(10, 130, 10)
})
X_reg = data[['feature']]
y_reg = data['target']

kf = KFold(n_splits=3, shuffle=True, random_state=42)

for fold, (train_index, test_index) in enumerate(kf.split(X_reg)):
    X_train_val, X_test = X_reg.iloc[train_index], X_reg.iloc[test_index]
    y_train_val, y_test = y_reg.iloc[train_index], y_reg.iloc[test_index]

    # Train/Val split
    X_train, X_val, y_train, y_val = train_test_split(
        X_train_val, y_train_val, test_size=0.5, random_state=42
    )

    model = LinearRegression()
    model.fit(X_train, y_train)

    y_val_pred = model.predict(X_val)
    mse_val = mean_squared_error(y_val, y_val_pred)

    y_test_pred = model.predict(X_test)
    mse_test = mean_squared_error(y_test, y_test_pred)
    r2 = r2_score(y_test, y_test_pred)
    rmse = np.sqrt(mse_test)

    print(f"Fold {fold+1}")
    print("Validation MSE:", mse_val)
    print("Test MSE:", mse_test)
    print("R²:", r2)
    print("RMSE:", rmse)
    print("-"*30)

# -------------------------
# Step 3: Classification Example
# -------------------------
print("\n--- Classification with Logistic Regression (Demo Data) ---")
data_class = pd.DataFrame({
    'feature1': [5, 2, 9, 4, 7, 6, 1, 8, 3, 0],
    'feature2': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    'target':   [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]  # binary classification
})

X_cls = data_class[['feature1', 'feature2']]
y_cls = data_class['target']

# Train+Val / Test split
X_train_val, X_test, y_train_val, y_test = train_test_split(
    X_cls, y_cls, test_size=0.3, random_state=42, stratify=y_cls
)

# Train / Val split
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=0.33, random_state=42, stratify=y_train_val
)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled   = scaler.transform(X_val)
X_test_scaled  = scaler.transform(X_test)

# Logistic Regression
log_reg = LogisticRegression(random_state=42)
log_reg.fit(X_train_scaled, y_train)

# Validation
y_val_pred = log_reg.predict(X_val_scaled)
print("Validation Results")
print("F1-score:", f1_score(y_val, y_val_pred, average='weighted'))
print("Accuracy:", accuracy_score(y_val, y_val_pred))
print("-"*30)

# Test
y_test_pred = log_reg.predict(X_test_scaled)
print("Test Results")
print("F1-score:", f1_score(y_test, y_test_pred, average='weighted'))
print("Accuracy:", accuracy_score(y_test, y_test_pred))
print("y_test:", y_test.values)
print("y_pred:", y_test_pred)

# -------------------------
# Step 4: Classification with RandomForest
# -------------------------
print("\n--- Classification with RandomForest (Demo Data) ---")
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)
y_pred_class = clf.predict(X_test)
print("F1-score:", f1_score(y_test, y_pred_class, average='weighted'))

Saving iris_dataset.csv to iris_dataset.csv
Shape of dataset: (150, 5)
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  
0       0  
1       0  
2       0  
3       0  
4       0  

--- Regression with KFold (Demo Dataset) ---
Fold 1
Validation MSE: 2.808344822586802e-28
Test MSE: 2.808344822586802e-28
R²: 1.0
RMSE: 1.6758116906701664e-14
------------------------------
Fold 2
Validation MSE: 2.5559093329160782e-28
Test MSE: 2.5559093329160782e-28
R²: 1.0
RMSE: 1.5987211554602254e-14
------------------------------
Fold 3
Validation MSE: 3.1869980570928877e-2