## Cristian Taylor
## 7/12/2024
## DSC320 Math for Data Science 


In [1]:
import numpy as np

def normalize_vector(vector):
    # Calculate the magnitude (Euclidean norm) of the vector
    magnitude = np.linalg.norm(vector)
    
    # Normalize the vector by dividing each element by the magnitude
    if magnitude == 0:
        return vector  # Avoid division by zero
    normalized_vector = vector / magnitude
    
    return normalized_vector

# Example usage
vector = np.array([1, 2, 3])
normalized_vector = normalize_vector(vector)
print("Original vector:", vector)
print("Normalized vector:", normalized_vector)

Original vector: [1 2 3]
Normalized vector: [0.26726124 0.53452248 0.80178373]


In [3]:

def standardize_vector(vector):
    # Calculate the mean of the vector
    mean = np.mean(vector)
    
    # Calculate the standard deviation of the vector
    std_dev = np.std(vector)
    
    # Standardize the vector by subtracting the mean and dividing by the standard deviation
    if std_dev == 0:
        return vector  # Avoid division by zero
    standardized_vector = (vector - mean) / std_dev
    
    return standardized_vector

# Example usage
vector = np.array([1, 2, 3, 4, 5])
standardized_vector = standardize_vector(vector)
print("Original vector:", vector)
print("Standardized vector:", standardized_vector)


Original vector: [1 2 3 4 5]
Standardized vector: [-1.41421356 -0.70710678  0.          0.70710678  1.41421356]


In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Load the dataset
cali_df = pd.read_csv('calif_housing_data.csv')
cali_df.describe()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
count,20640.0,20640.0,20640.0,20640.0,20433.0,20640.0,20640.0,20640.0,20640.0
mean,-119.569704,35.631861,28.639486,2635.763081,537.870553,1425.476744,499.53968,3.870671,206855.816909
std,2.003532,2.135952,12.585558,2181.615252,421.38507,1132.462122,382.329753,1.899822,115395.615874
min,-124.35,32.54,1.0,2.0,1.0,3.0,1.0,0.4999,14999.0
25%,-121.8,33.93,18.0,1447.75,296.0,787.0,280.0,2.5634,119600.0
50%,-118.49,34.26,29.0,2127.0,435.0,1166.0,409.0,3.5348,179700.0
75%,-118.01,37.71,37.0,3148.0,647.0,1725.0,605.0,4.74325,264725.0
max,-114.31,41.95,52.0,39320.0,6445.0,35682.0,6082.0,15.0001,500001.0


In [9]:


# (a) How many rows does this data set have?
num_rows = cali_df.shape[0]
print(f"The dataset has {num_rows} rows.")

# (b) What is the target vector for your model?
target_vector = cali_df['median_house_value']
print("The target vector for the model is the 'median_house_value' column.")

# (c) Create a new feature by taking the total bedrooms divided by the number of households
cali_df['bedrooms_per_household'] = cali_df['total_bedrooms'] / cali_df['households']
print("Created new feature 'bedrooms_per_household'.")

# (d) Create a new data frame with median age, median income, and the new feature
new_df = cali_df[['housing_median_age', 'median_income', 'bedrooms_per_household']]
print("New DataFrame with selected features created.")

# (e) Apply data standardization to the features in the new data frame
scaler = StandardScaler()
standardized_features = scaler.fit_transform(new_df)
standardized_df = pd.DataFrame(standardized_features, columns=new_df.columns)
print("Data standardization applied to the features in the new DataFrame.")

# Display the first few rows of the standardized DataFrame
print(standardized_df.head())

The dataset has 20640 rows.
The target vector for the model is the 'median_house_value' column.
Created new feature 'bedrooms_per_household'.
New DataFrame with selected features created.
Data standardization applied to the features in the new DataFrame.
   housing_median_age  median_income  bedrooms_per_household
0            0.982143       2.344766               -0.153863
1           -0.607019       2.332238               -0.262936
2            1.856182       1.782699               -0.049604
3            1.856182       0.932968               -0.050417
4            1.856182      -0.012881               -0.033568
