In [2]:
# ================================================================
# Title: Assignment 6.2 - Data Normalization
# Author: ajsaraujo (GitHub username)
# Date: 2/1/2026
# Modified By: Cyndie Cramer
# Description: Take in a vector and normalize it
# =================================================================


def normalize_vector(data):
    # Find the minimum value in the data
    minimum = min(data)

    # Find the maximum value in the data
    maximum = max(data)

    # Create an empty list to store normalized values
    normalized_data = []

    # Loop through each value in the vector
    for x in data:
        # Apply the min-max normalization formula
        normalized_value = (x - minimum) / (maximum - minimum)
        normalized_data.append(normalized_value)

    # Return the normalized vector
    return normalized_data


# Example input vector
data = [5, 10, 15, 20]

# Normalize the vector
result = normalize_vector(data)

# Display the result
print(result)


[0.0, 0.3333333333333333, 0.6666666666666666, 1.0]


In [5]:
# ================================================================
# Title: Assignment 6.2 - Data Standardization
# Author: saimadhu-polamuri (GitHub username)
# Date: 2/1/2026
# Modified By: Cyndie Cramer
# Description: Take in a vector and standardize it.
# =================================================================


# Import the libraries we need
import math  # needed for square root calculation

def standardize_vector(data):
    """
    Standardizes a vector using z-score normalization.
    
    data : list of numbers
    returns : list of standardized values
    """

    # Step 1: Calculate the mean (average) of the data
    mean = sum(data) / len(data)

    # Step 2: Calculate the standard deviation
    # Standard deviation formula: sqrt(sum((x - mean)^2) / (n - 1)) for sample std
    variance_sum = 0
    for x in data:
        variance_sum += (x - mean) ** 2
    std_dev = math.sqrt(variance_sum / (len(data) - 1))

    # Step 3: Create a list to store standardized values
    standardized_data = []

    # Step 4: Loop through each value and calculate its z-score
    for x in data:
        z_score = (x - mean) / std_dev
        standardized_data.append(z_score)

    # Step 5: Return the list of standardized values
    return standardized_data


# Example input vector
data = [10, 20, 30, 40, 50]

# Standardize the vector
standardized_result = standardize_vector(data)

# Display the result
print(standardized_result)


[-1.2649110640673518, -0.6324555320336759, 0.0, 0.6324555320336759, 1.2649110640673518]


In [None]:
# ================================================================
# Title: Assignment 6.2 - Working with a Dataframe
# Author: saimadhu-polamuri (GitHub username)
# Date: 2/1/2026
# Modified By: Cyndie Cramer
# Description: Using a data set that has housing information on 
# various California block neighborhoods, build a model to predict 
# the median house value.
# =================================================================


# Import necessary libraries
import pandas as pd  # For handling CSV files and data frames
import math          # For square root calculation in standardization


# Step 1: Load the CSV data
file_path = r"C:\Users\cyndi\OneDrive - Bellevue University\Data Science\calif_housing_data.csv"

# Read the CSV into a pandas DataFrame
data = pd.read_csv(file_path)

# (a) How many rows does this dataset have?
num_rows = data.shape[0]
print("Number of rows:", num_rows)

# (b) What is the target vector for your model?
# The target vector is the column we want the model to predict
# In this case, it's the median house value
target_vector = data['median_house_value']

# Show first 5 values to check
print("\nTarget vector (first 5 values):")
print(target_vector.head())

# (c) Create a new feature: total_bedrooms / households
# This calculates the average number of bedrooms per household
data['bedrooms_per_household'] = data['total_bedrooms'] / data['households']

# Show first 5 values of the new feature
print("\nNew feature 'bedrooms_per_household' (first 5 values):")
print(data['bedrooms_per_household'].head())

# This feature represents how many bedrooms are in each household on average
# for each row in the dataset (useful for modeling housing trends)

# (d) Create a new data frame with three selected features
# Only keep the features we want to use for modeling:
# - housing_median_age
# - median_income
# - bedrooms_per_household
features_df = data[['housing_median_age', 'median_income', 'bedrooms_per_household']]

print("\nNew features data frame (first 5 rows):")
print(features_df.head())

# (e) Standardize the features (z-score normalization)
# Function to standardize a list/vector of numbers
def standardize_vector(vector):
    """
    Standardizes a vector using z-score normalization:
    z = (value - mean) / standard deviation
    """
    # Step 1: Compute the mean of the vector
    mean = sum(vector) / len(vector)

    # Step 2: Compute the variance (sum of squared differences from the mean)
    variance_sum = sum((x - mean) ** 2 for x in vector)

    # Step 3: Compute the standard deviation (sqrt of variance)
    std_dev = math.sqrt(variance_sum / (len(vector) - 1))  # sample standard deviation

    # Step 4: Compute z-scores for each value
    standardized = [(x - mean) / std_dev for x in vector]

    return standardized

# Step 5: Apply standardization to each column in the features dataframe
standardized_df = features_df.copy()  # Make a copy to avoid modifying the original
for col in standardized_df.columns:
    standardized_df[col] = standardize_vector(standardized_df[col])

# Show the first 5 rows of the standardized features
print("\nStandardized features (first 5 rows):")
print(standardized_df.head())
