In [2]:
print(california_housing.columns)


Index(['Median_House_Value', 'Median_Income', 'Median_Age', 'Tot_Rooms',
       'Tot_Bedrooms', 'Population', 'Households', 'Latitude', 'Longitude',
       'Distance_to_coast', 'Distance_to_LA', 'Distance_to_SanDiego',
       'Distance_to_SanJose', 'Distance_to_SanFrancisco'],
      dtype='object')


In [4]:
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

# Step 1: Load the dataset
california_housing = pd.read_csv('california_housing.csv')

# Step 2: Prepare the data
# Assuming 'median_house_value' is the target variable (binary classification task)
X = california_housing.drop(columns=['Median_House_Value'])
y = (california_housing['Median_House_Value'] > 265000).astype(int)  # Binary classification based on median threshold

# Step 3: Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train and evaluate different classification models
models = {
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC(probability=True)
}

for model_name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)

    # Predict on the test set
    y_pred = model.predict(X_test)

    # Calculate evaluation metrics
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f_score = f1_score(y_test, y_pred)

    # AUC-ROC
    if hasattr(model, "predict_proba"):  # Check if the model has predict_proba method
        y_probs = model.predict_proba(X_test)[:, 1]  # Probability of positive class
        auc_roc = roc_auc_score(y_test, y_probs)
    else:
        auc_roc = None

    # Display the evaluation metrics
    print(f"Model: {model_name}")
    print("Accuracy:", accuracy)
    print("\nConfusion Matrix:\n", conf_matrix)
    print("\nPrecision:", precision)
    print("\nRecall:", recall)
    print("\nF-Score:", f_score)
    print("\nAUC-ROC:", auc_roc)
    print("\n----------------------------\n")


Model: Decision Tree
Accuracy: 0.8771802325581395

Confusion Matrix:
 [[2853  259]
 [ 248  768]]

Precision: 0.747809152872444

Recall: 0.7559055118110236

F-Score: 0.7518355359765052

AUC-ROC: 0.8363396453656661

----------------------------

Model: Random Forest
Accuracy: 0.9241763565891473

Confusion Matrix:
 [[3010  102]
 [ 211  805]]

Precision: 0.8875413450937155

Recall: 0.7923228346456693

F-Score: 0.8372334893395735

AUC-ROC: 0.9674964387284173

----------------------------



  _warn_prf(average, modifier, msg_start, len(result))


Model: SVM
Accuracy: 0.7538759689922481

Confusion Matrix:
 [[3112    0]
 [1016    0]]

Precision: 0.0

Recall: 0.0

F-Score: 0.0

AUC-ROC: 0.8390657892739307

----------------------------



In [5]:
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

# Step 1: Load the dataset
california_housing = pd.read_csv('california_housing.csv')

# Step 2: Prepare the data
# Assuming 'median_house_value' is the target variable
X = california_housing.drop(columns=['Median_House_Value'])
y = california_housing['Median_House_Value']

# Step 3: Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train and evaluate different regression models
models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest": RandomForestRegressor(),
    "SVR": SVR()
}

for model_name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)

    # Predict on the test set
    y_pred = model.predict(X_test)

    # Calculate evaluation metrics
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    # Calculate adjusted R2
    n = len(X_test)
    p = X_test.shape[1]
    adj_r2 = 1 - (1 - r2) * ((n - 1) / (n - p - 1))

    # Display the evaluation metrics
    print(f"Model: {model_name}")
    print("Mean Absolute Error (MAE):", mae)
    print("Mean Squared Error (MSE):", mse)
    print("R2 Score:", r2)
    print("Adjusted R2:", adj_r2)
    print("\n----------------------------\n")


Model: Linear Regression
Mean Absolute Error (MAE): 50404.85523916031
Mean Squared Error (MSE): 4809839606.897536
R2 Score: 0.6329512627916902
Adjusted R2: 0.6317914101947754

----------------------------

Model: Decision Tree
Mean Absolute Error (MAE): 40439.717781007756
Mean Squared Error (MSE): 4247655637.828246
R2 Score: 0.6758526758928101
Adjusted R2: 0.6748283892585385

----------------------------

Model: Random Forest
Mean Absolute Error (MAE): 30288.574718992248
Mean Squared Error (MSE): 2269211046.500581
R2 Score: 0.82683184531086
Adjusted R2: 0.8262846440442195

----------------------------

Model: SVR
Mean Absolute Error (MAE): 87322.67310758524
Mean Squared Error (MSE): 13741618743.916553
R2 Score: -0.048651144191897755
Adjusted R2: -0.051964820631978936

----------------------------

