In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

file_path = 'Data-Melbourne_F_fixed.csv'
def load_and_preprocess_data(file_path):
    df = pd.read_csv(file_path)
    print("Missing values:")
    print(df.isnull().sum())
    
    df.fillna(df.median(), inplace=True)
    #feature correlation
    plt.figure(figsize=(12, 8))
    sns.heatmap(df.corr(), annot=True, cmap="coolwarm")
    plt.title("Feature Correlation")
    plt.show()

    # Define features and target
    X = df.drop(columns=['target_column'])  
    y = df['target_column']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y

def logistic_regression_scratch(X, y, learning_rate=0.01, epochs=1000):
    m, n = X.shape
    weights = np.zeros(n)
    bias = 0

    for epoch in range(epochs):
        linear_model = np.dot(X, weights) + bias
        y_pred = 1 / (1 + np.exp(-linear_model))
        dw = (1 / m) * np.dot(X.T, (y_pred - y))
        db = (1 / m) * np.sum(y_pred - y)
        weights -= learning_rate * dw
        bias -= learning_rate * db
    return weights, bias

def predict_logistic(X, weights, bias):
    linear_model = np.dot(X, weights) + bias
    y_pred = 1 / (1 + np.exp(-linear_model))
    return [1 if i > 0.5 else 0 for i in y_pred]

# Evaluate model
def evaluate_model(y_test, y_pred):
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("F1-Score:", f1_score(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))

# Main function
def main():
    file_path = 'Data-Melbourne_F_fixed.csv'  
    X, y = load_and_preprocess_data(file_path)

    # Split data 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    #logistic regression
    weights, bias = logistic_regression_scratch(X_train, y_train)
    y_pred_scratch = predict_logistic(X_test, weights, bias)
    print("Logistic Regression from Scratch")
    evaluate_model(y_test, y_pred_scratch)

    # Logistic Regression with sklearn
    lr_model = LogisticRegression()
    lr_model.fit(X_train, y_train)
    y_pred_lr = lr_model.predict(X_test)
    print("Logistic Regression with sklearn")
    evaluate_model(y_test, y_pred_lr)

    # K-Nearest Neighbors
    knn_model = KNeighborsClassifier(n_neighbors=5)
    knn_model.fit(X_train, y_train)
    y_pred_knn = knn_model.predict(X_test)
    print("K-Nearest Neighbors")
    evaluate_model(y_test, y_pred_knn)

    # Decision Tree Classifier
    dt_model = DecisionTreeClassifier()
    dt_model.fit(X_train, y_train)
    y_pred_dt = dt_model.predict(X_test)
    print("Decision Tree Classifier")
    evaluate_model(y_test, y_pred_dt)

    # Random Forest Classifier
    rf_model = RandomForestClassifier(n_estimators=100)
    rf_model.fit(X_train, y_train)
    y_pred_rf = rf_model.predict(X_test)
    print("Random Forest Classifier")
    evaluate_model(y_test, y_pred_rf)

    # SVM
    svm_model = SVC(kernel='linear')
    svm_model.fit(X_train, y_train)
    y_pred_svm = svm_model.predict(X_test)
    print("Support Vector Machine")
    evaluate_model(y_test, y_pred_svm)

    param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
    grid_search = GridSearchCV(SVC(), param_grid, cv=5)
    grid_search.fit(X_train, y_train)
    print("Best Parameters for SVM:", grid_search.best_params_)
    y_pred_tuned = grid_search.best_estimator_.predict(X_test)
    print("Tuned SVM")
    evaluate_model(y_test, y_pred_tuned)

if __name__ == "__main__":
    main()


FileNotFoundError: [Errno 2] No such file or directory: 'Data-Melbourne_F_fixed.csv'