In [None]:
import pandas as pd
from sklearn.datasets import fetch_california_housing

# Fetch the California housing dataset
data = fetch_california_housing()

# Create a DataFrame from the features and target variable
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

# Save the DataFrame as a CSV file
df.to_csv('house_prices.csv', index=False)


In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler

# Fetch the California housing dataset
data = fetch_california_housing()

# Extract the features and target variable
X = data.data
y = data.target

# Perform data preprocessing
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Print the mean and standard deviation of the scaled features
print("Mean of scaled features:")
print(X_scaled.mean(axis=0))
print("\nStandard deviation of scaled features:")
print(X_scaled.std(axis=0))


Mean of scaled features:
[ 6.60969987e-17  5.50808322e-18  6.60969987e-17 -1.06030602e-16
 -1.10161664e-17  3.44255201e-18 -1.07958431e-15 -8.52651283e-15]

Standard deviation of scaled features:
[1. 1. 1. 1. 1. 1. 1. 1.]


In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.decomposition import PCA

# Fetch the California housing dataset
data = fetch_california_housing()

# Extract the features and target variable
X = data.data
y = data.target

# Perform feature extraction using PCA
pca = PCA(n_components=3)  # Set the number of components to extract
X_extracted = pca.fit_transform(X)

# Print the shape of the extracted features
print("Shape of extracted features:", X_extracted.shape)


Shape of extracted features: (20640, 3)


In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.feature_selection import SelectKBest, mutual_info_regression
import numpy as np

# Fetch the California housing dataset
data = fetch_california_housing()

# Extract the features and target variable
X = data.data
y = data.target

# Replace negative values with zeros
X_non_negative = np.maximum(X, 0)

# Apply logarithmic transformation to features
X_log = np.log1p(X_non_negative)

# Perform feature extraction
k_best = SelectKBest(mutual_info_regression, k=3)  # Select the top 3 features
X_extracted = k_best.fit_transform(X_log, y)

# Print the shape of the extracted features
print("Shape of extracted features:", X_extracted.shape)

Shape of extracted features: (20640, 3)


# New Section

In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.feature_selection import SelectKBest, mutual_info_regression
from sklearn.preprocessing import MinMaxScaler

# Fetch the California housing dataset
data = fetch_california_housing()

# Extract the features and target variable
X = data.data
y = data.target

# Apply MinMaxScaler to normalize features between 0 and 1
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Perform feature selection using SelectKBest with mutual_info_regression
k_best = SelectKBest(score_func=mutual_info_regression, k=3)  # Set the number of features to select
X_selected = k_best.fit_transform(X_scaled, y)

# Print the shape of the selected features
print("Shape of selected features:", X_selected.shape)


Shape of selected features: (20640, 3)


In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.preprocessing import MinMaxScaler

# Fetch the California housing dataset
data = fetch_california_housing()

# Extract the features and target variable
X = data.data
y = data.target

# Apply MinMaxScaler to normalize features between 0 and 1
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Perform feature extraction using PCA
pca = PCA(n_components=3)
X_extracted = pca.fit_transform(X_scaled)

# Perform feature selection using SelectKBest with f_regression
k_best = SelectKBest(score_func=f_regression, k=3)
X_selected = k_best.fit_transform(X, y)

# Print the shape of the extracted and selected features
print("Shape of extracted features:", X_extracted.shape)
print("Shape of selected features:", X_selected.shape)


Shape of extracted features: (20640, 3)
Shape of selected features: (20640, 3)


In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score

# Fetch the California housing dataset
data = fetch_california_housing()

# Extract the features and target variable
X = data.data
y = data.target

# Apply MinMaxScaler to normalize features between 0 and 1
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Initialize the linear regression model
linear_regression = LinearRegression()

# Train the model on the training data
linear_regression.fit(X_train, y_train)

# Predict on the test data
y_pred = linear_regression.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Mean Squared Error:", mse)
print("R2 Score:", r2)


Mean Squared Error: 0.555891598695244
R2 Score: 0.5757877060324511
