In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler

# Load the dataset
df = pd.read_csv("house_price.csv")

# Select features (size and bedroom) and target (price)
X = df[['size', 'bedroom']]
y = df['price']

# Train and evaluate LinearRegression
# Create and train the Linear Regression model
lr_model = LinearRegression()
lr_model.fit(X, y)

# Make predictions
y_pred_lr = lr_model.predict(X)

# Calculate evaluation metrics
mae_lr = mean_absolute_error(y, y_pred_lr)
mse_lr = mean_squared_error(y, y_pred_lr)
rmse_lr = np.sqrt(mse_lr)
mape_lr = np.mean(np.abs((y - y_pred_lr) / y)) * 100

# Print Linear Regression results
print("==== Linear Regression ====")
print("Coefficients:", lr_model.coef_)
print("Intercept:", lr_model.intercept_)
print(f"MAE: {mae_lr:.2f}")
print(f"MSE: {mse_lr:.2f}")
print(f"RMSE: {rmse_lr:.2f}")
print(f"MAPE: {mape_lr:.2f}%")

# Train and evaluate SGDRegressor
# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Create and train the SGD Regressor model
sgd_model = SGDRegressor(max_iter=1000, tol=1e-3, random_state=42)
sgd_model.fit(X_scaled, y)

# Make predictions
y_pred_sgd = sgd_model.predict(X_scaled)

# Calculate evaluation metrics
mae_sgd = mean_absolute_error(y, y_pred_sgd)
mse_sgd = mean_squared_error(y, y_pred_sgd)
rmse_sgd = np.sqrt(mse_sgd)
mape_sgd = np.mean(np.abs((y - y_pred_sgd) / y)) * 100

# Print SGD Regressor results
print("\n==== SGD Regressor ====")
print("Coefficients:", sgd_model.coef_)
print("Intercept:", sgd_model.intercept_)
print(f"MAE: {mae_sgd:.2f}")
print(f"MSE: {mse_sgd:.2f}")
print(f"RMSE: {rmse_sgd:.2f}")
print(f"MAPE: {mape_sgd:.2f}%")


==== Linear Regression ====
Coefficients: [  139.21067402 -8738.01911233]
Intercept: 89597.90954279754
MAE: 51502.77
MSE: 4086560101.21
RMSE: 63926.21
MAPE: 15.61%

==== SGD Regressor ====
Coefficients: [108873.92893799  -5981.12043584]
Intercept: [340375.84521395]
MAE: 51563.18
MSE: 4086863630.42
RMSE: 63928.58
MAPE: 15.62%


In [12]:
import os
import cv2
import glob
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from joblib import dump

# Load and preprocess data with histogram equalization
def load_data_histogram_equalized(data_dir, size=(64, 64)):
    X, y = [], []    
    for img_path in glob.glob(os.path.join(data_dir, '*', '*')):
        label = os.path.basename(os.path.dirname(img_path))
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            continue
        img = cv2.equalizeHist(img)
        img = cv2.resize(img, size)
        img = img / 255.0
        X.append(img.flatten())
        y.append(label)

    return np.array(X), np.array(y)

# Load dataset
X, y = load_data_histogram_equalized("C:/Users/lance/repos/Summer Sem/CVI/Assignment2/Codes/Assignment/A2/Q2/train")

# Split into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train and tune kNN
best_k, best_acc_knn = 1, 0
for k in range(1, 16):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_scaled, y_train)
    acc = knn.score(X_test_scaled, y_test)
    print(f"k={k}, Accuracy={acc:.4f}")
    if acc > best_acc_knn:
        best_k, best_acc_knn = k, acc
best_knn_model = KNeighborsClassifier(n_neighbors=best_k)
best_knn_model.fit(X_train_scaled, y_train)

# Train and tune Logistic Regression
best_C, best_acc_lr = 0.01, 0
for C in [0.01, 0.1, 1, 10, 100]:
    lr = LogisticRegression(C=C, max_iter=1000)
    lr.fit(X_train_scaled, y_train)
    acc = lr.score(X_test_scaled, y_test)
    print(f"C={C}, Accuracy={acc:.4f}")
    if acc > best_acc_lr:
        best_C, best_acc_lr = C, acc
best_lr_model = LogisticRegression(C=best_C, max_iter=1000)
best_lr_model.fit(X_train_scaled, y_train)

# Compare final models
acc_knn = best_knn_model.score(X_test_scaled, y_test)
acc_lr = best_lr_model.score(X_test_scaled, y_test)

if acc_knn > acc_lr:
    print(f"\nBest model: kNN (k={best_k}), Accuracy={acc_knn:.4f}")
    dump(best_knn_model, "best_model_q2.joblib")
    y_pred = best_knn_model.predict(X_test_scaled)
else:
    print(f"\nBest model: Logistic Regression (C={best_C}), Accuracy={acc_lr:.4f}")
    dump(best_lr_model, "best_model_q2.joblib")
    y_pred = best_lr_model.predict(X_test_scaled)

# Print classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

k=1, Accuracy=0.4800
k=2, Accuracy=0.5025
k=3, Accuracy=0.5500
k=4, Accuracy=0.5300
k=5, Accuracy=0.5325
k=6, Accuracy=0.5475
k=7, Accuracy=0.5450
k=8, Accuracy=0.5475
k=9, Accuracy=0.5675
k=10, Accuracy=0.5500
k=11, Accuracy=0.5575
k=12, Accuracy=0.5475
k=13, Accuracy=0.5650
k=14, Accuracy=0.5650
k=15, Accuracy=0.5775
C=0.01, Accuracy=0.4775
C=0.1, Accuracy=0.4625
C=1, Accuracy=0.4625
C=10, Accuracy=0.4775
C=100, Accuracy=0.4825

Best model: kNN (k=15), Accuracy=0.5775

Classification Report:
              precision    recall  f1-score   support

         Cat       0.55      0.82      0.66       199
         Dog       0.66      0.33      0.44       201

    accuracy                           0.58       400
   macro avg       0.60      0.58      0.55       400
weighted avg       0.60      0.58      0.55       400

