In [3]:
import pandas as pd
import requests
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.preprocessing import PolynomialFeatures  # Import PolynomialFeatures
from sklearn.metrics import r2_score

# Function to download a file
def download(url, filename):
    response = requests.get(url)
    with open(filename, 'wb') as file:
        file.write(response.content)

# Define the download path
download_path = "C:\\Users\\nteny\\Desktop\\databases\\housing.csv"  # Replace with your actual path

# Download the housing dataset
url = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DA0101EN-SkillsNetwork/labs/FinalModule_Coursera/data/kc_house_data_NaN.csv'
download(url, download_path)

# Load the dataset
df = pd.read_csv(download_path)

# Drop specified columns if they exist
df.drop(['id', 'Unnamed: 0'], axis=1, inplace=True, errors='ignore')

# Check for missing values
print("Missing values in each column:")
print(df.isnull().sum())

# Drop rows with any missing values
df.dropna(inplace=True)

# Define the list of features and target variable
features = ["floors", "waterfront", "lat", "bedrooms", "sqft_basement", 
            "view", "bathrooms", "sqft_living15", "sqft_above", "grade", 
            "sqft_living"]
X = df[features]
y = df['price']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Perform a second-order polynomial transform on the training and testing data
poly = PolynomialFeatures(degree=2)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Create a Ridge regression model with a regularization parameter of 0.1
ridge_model = Ridge(alpha=0.1)

# Fit the model using the transformed training data
ridge_model.fit(X_train_poly, y_train)

# Predict the target values using the transformed test data
y_pred = ridge_model.predict(X_test_poly)

# Calculate the R^2 score
r2 = r2_score(y_test, y_pred)

# Display the R^2 score
print("R^2 score:", r2)


ModuleNotFoundError: No module named 'sklearn'